From f843c9f4c54139c7b759f226573564bc4cf083bf Mon Sep 17 00:00:00 2001 From: coderhxl Date: Fri, 17 Feb 2023 12:49:01 +0800 Subject: [PATCH] other --- README.md | 55 ++++++++++++++++++++++++++++------- document/cn.md | 53 +++++++++++++++++++++++++++------- package.json | 2 +- publish/README.md | 55 ++++++++++++++++++++++++++++------- publish/package.json | 2 +- test/start/index.js | 2 +- test/start/index.ts | 68 +++++++++++++++++--------------------------- 7 files changed, 160 insertions(+), 77 deletions(-) diff --git a/README.md b/README.md index 0ed27da..bf7cc8f 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,8 @@ XCrawl is a Nodejs multifunctional crawler library. - Crawl HTML, JSON, file resources, etc. with simple configuration - Use the JSDOM library to parse HTML, or parse HTML by yourself -- Optional mode asynchronous/synchronous for batch requests +- The request method supports asynchronous/synchronous +- Support Promise/Callback - Polling function - Anthropomorphic request interval - Written in TypeScript @@ -47,6 +48,7 @@ XCrawl is a Nodejs multifunctional crawler library. * [IFetchFileConfig](#IFetchFileConfig) * [IFetchPollingConfig](#IFetchPollingConfig) * [IFetchCommon](#IFetchCommon) + * [IFetchCommonArr](#IFetchCommonArr) * [IFileInfo](#IFileInfo) * [IFetchHTML](#IFetchHTML) - [More](#More) @@ -92,10 +94,26 @@ Create a crawler instance via new XCrawl. The request queue is maintained by the ```ts class XCrawl { constructor(baseConfig?: IXCrawlBaseConifg) - fetchHTML(config: IFetchHTMLConfig): Promise - fetchData(config: IFetchDataConfig): Promise> - fetchFile(config: IFetchFileConfig): Promise> - fetchPolling(config: IFetchPollingConfig, callback: (count: number) => void): void + + fetchHTML( + config: IFetchHTMLConfig, + callback?: (res: IFetchHTML) => void + ): Promise + + fetchData( + config: IFetchDataConfig, + callback?: (res: IFetchCommon) => void + ): Promise> + + fetchFile( + config: IFetchFileConfig, + callback?: (res: IFetchCommon) => void + ): Promise> + + fetchPolling( + config: IFetchPollingConfig, + callback: (count: number) => void + ): void } ``` @@ -142,7 +160,10 @@ fetchHTML is the method of the above [myXCrawl](https://github.com/coder-hxl/x-c #### Type ```ts -function fetchHTML(config: IFetchHTMLConfig): Promise +fetchHTML( + config: IFetchHTMLConfig, + callback?: (res: IFetchHTML) => void +): Promise ``` #### Example @@ -161,7 +182,10 @@ fetchData is the method of the above [myXCrawl](#Example-1) instance, which is u #### Type ```ts -function fetchData(config: IFetchDataConfig): Promise> +fetchData( + config: IFetchDataConfig, + callback?: (res: IFetchCommon) => void +): Promise> ``` #### Example @@ -188,7 +212,10 @@ fetchFile is the method of the above [myXCrawl](#Example-1) instance, which is u #### Type ```ts -function fetchFile(config: IFetchFileConfig): Promise> +fetchFile( + config: IFetchFileConfig, + callback?: (res: IFetchCommon) => void +): Promise> ``` #### Example @@ -331,12 +358,18 @@ interface IFetchPollingConfig { ### IFetchCommon ```ts -type IFetchCommon = { +interface IFetchCommon { id: number statusCode: number | undefined - headers: IncomingHttpHeaders // node:http type + headers: IncomingHttpHeaders // node:http 类型 data: T -}[] +} +``` + +### IFetchCommonArr + +```ts +type IFetchCommonArr = IFetchCommon[] ``` ### IFileInfo diff --git a/document/cn.md b/document/cn.md index f9dbdc5..ccd3763 100644 --- a/document/cn.md +++ b/document/cn.md @@ -8,7 +8,8 @@ XCrawl 是 Nodejs 多功能爬虫库。 - 只需简单的配置即可抓取 HTML 、JSON、文件资源等等 - 使用 JSDOM 库对 HTML 解析,也可自行解析 HTML -- 批量请求时可选择模式 异步/同步 +- 请求方式支持 异步/同步 +- 支持 Promise/Callback - 轮询功能 - 拟人化的请求间隔时间 - 使用 TypeScript 编写 @@ -47,6 +48,7 @@ XCrawl 是 Nodejs 多功能爬虫库。 * [IFetchFileConfig](#IFetchFileConfig) * [IFetchPollingConfig](#IFetchPollingConfig) * [IFetchCommon](#IFetchCommon) + * [IFetchCommonArr](#IFetchCommonArr) * [IFileInfo](#IFileInfo) * [IFetchHTML](#IFetchHTML) - [更多](#更多) @@ -104,10 +106,26 @@ myXCrawl.fetchPolling({ d: 1 }, () => { ```ts class XCrawl { constructor(baseConfig?: IXCrawlBaseConifg) - fetchHTML(config: IFetchHTMLConfig): Promise - fetchData(config: IFetchDataConfig): Promise> - fetchFile(config: IFetchFileConfig): Promise> - fetchPolling(config: IFetchPollingConfig, callback: (count: number) => void): void + + fetchHTML( + config: IFetchHTMLConfig, + callback?: (res: IFetchHTML) => void + ): Promise + + fetchData( + config: IFetchDataConfig, + callback?: (res: IFetchCommon) => void + ): Promise> + + fetchFile( + config: IFetchFileConfig, + callback?: (res: IFetchCommon) => void + ): Promise> + + fetchPolling( + config: IFetchPollingConfig, + callback: (count: number) => void + ): void } ``` @@ -154,7 +172,10 @@ fetchHTML 是 [myXCrawl](https://github.com/coder-hxl/x-crawl/blob/main/document #### 类型 ```ts -function fetchHTML(config: IFetchHTMLConfig): Promise +fetchHTML( + config: IFetchHTMLConfig, + callback?: (res: IFetchHTML) => void +): Promise ``` #### 示例 @@ -173,7 +194,10 @@ fetch 是 [myXCrawl](#示例-1) 实例的方法,通常用于爬取 API ,可 #### 类型 ```ts -function fetchData(config: IFetchDataConfig): Promise> +fetchData( + config: IFetchDataConfig, + callback?: (res: IFetchCommon) => void +): Promise> ``` #### 示例 @@ -200,7 +224,10 @@ fetchFile 是 [myXCrawl](#示例-1) 实例的方法,通常用于爬取文件 #### 类型 ```ts -function fetchFile(config: IFetchFileConfig): Promise> +fetchFile( + config: IFetchFileConfig, + callback?: (res: IFetchCommon) => void +): Promise> ``` #### 示例 @@ -343,12 +370,18 @@ interface IFetchPollingConfig { ### IFetchCommon ```ts -type IFetchCommon = { +interface IFetchCommon { id: number statusCode: number | undefined headers: IncomingHttpHeaders // node:http 类型 data: T -}[] +} +``` + +### IFetchCommonArr + +```ts +type IFetchCommonArr = IFetchCommon[] ``` ### IFileInfo diff --git a/package.json b/package.json index 5967a80..b179186 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "private": true, "name": "x-crawl", - "version": "0.4.0", + "version": "1.0.0", "author": "CoderHxl", "description": "XCrawl is a Nodejs multifunctional crawler library.", "license": "MIT", diff --git a/publish/README.md b/publish/README.md index 0ed27da..bf7cc8f 100644 --- a/publish/README.md +++ b/publish/README.md @@ -8,7 +8,8 @@ XCrawl is a Nodejs multifunctional crawler library. - Crawl HTML, JSON, file resources, etc. with simple configuration - Use the JSDOM library to parse HTML, or parse HTML by yourself -- Optional mode asynchronous/synchronous for batch requests +- The request method supports asynchronous/synchronous +- Support Promise/Callback - Polling function - Anthropomorphic request interval - Written in TypeScript @@ -47,6 +48,7 @@ XCrawl is a Nodejs multifunctional crawler library. * [IFetchFileConfig](#IFetchFileConfig) * [IFetchPollingConfig](#IFetchPollingConfig) * [IFetchCommon](#IFetchCommon) + * [IFetchCommonArr](#IFetchCommonArr) * [IFileInfo](#IFileInfo) * [IFetchHTML](#IFetchHTML) - [More](#More) @@ -92,10 +94,26 @@ Create a crawler instance via new XCrawl. The request queue is maintained by the ```ts class XCrawl { constructor(baseConfig?: IXCrawlBaseConifg) - fetchHTML(config: IFetchHTMLConfig): Promise - fetchData(config: IFetchDataConfig): Promise> - fetchFile(config: IFetchFileConfig): Promise> - fetchPolling(config: IFetchPollingConfig, callback: (count: number) => void): void + + fetchHTML( + config: IFetchHTMLConfig, + callback?: (res: IFetchHTML) => void + ): Promise + + fetchData( + config: IFetchDataConfig, + callback?: (res: IFetchCommon) => void + ): Promise> + + fetchFile( + config: IFetchFileConfig, + callback?: (res: IFetchCommon) => void + ): Promise> + + fetchPolling( + config: IFetchPollingConfig, + callback: (count: number) => void + ): void } ``` @@ -142,7 +160,10 @@ fetchHTML is the method of the above [myXCrawl](https://github.com/coder-hxl/x-c #### Type ```ts -function fetchHTML(config: IFetchHTMLConfig): Promise +fetchHTML( + config: IFetchHTMLConfig, + callback?: (res: IFetchHTML) => void +): Promise ``` #### Example @@ -161,7 +182,10 @@ fetchData is the method of the above [myXCrawl](#Example-1) instance, which is u #### Type ```ts -function fetchData(config: IFetchDataConfig): Promise> +fetchData( + config: IFetchDataConfig, + callback?: (res: IFetchCommon) => void +): Promise> ``` #### Example @@ -188,7 +212,10 @@ fetchFile is the method of the above [myXCrawl](#Example-1) instance, which is u #### Type ```ts -function fetchFile(config: IFetchFileConfig): Promise> +fetchFile( + config: IFetchFileConfig, + callback?: (res: IFetchCommon) => void +): Promise> ``` #### Example @@ -331,12 +358,18 @@ interface IFetchPollingConfig { ### IFetchCommon ```ts -type IFetchCommon = { +interface IFetchCommon { id: number statusCode: number | undefined - headers: IncomingHttpHeaders // node:http type + headers: IncomingHttpHeaders // node:http 类型 data: T -}[] +} +``` + +### IFetchCommonArr + +```ts +type IFetchCommonArr = IFetchCommon[] ``` ### IFileInfo diff --git a/publish/package.json b/publish/package.json index c661115..5b00866 100644 --- a/publish/package.json +++ b/publish/package.json @@ -1,6 +1,6 @@ { "name": "x-crawl", - "version": "0.4.0", + "version": "1.0.0", "author": "CoderHxl", "description": "XCrawl is a Nodejs multifunctional crawler library.", "license": "MIT", diff --git a/test/start/index.js b/test/start/index.js index 4b8bb0a..199e1f7 100644 --- a/test/start/index.js +++ b/test/start/index.js @@ -1 +1 @@ -"use strict";var e=require("node:path"),t=require("node:fs"),o=require("jsdom"),n=require("node:http"),r=require("node:https"),s=require("node:url"),a=require("https-proxy-agent"),i=require("chalk");const c=console.log,u=i.hex("#a57fff"),l=i.green,h=i.red,f=i.yellow;function d(e){return void 0===e}function g(e){return"number"==typeof e}function m(e){return Array.isArray(e)}function p(e,t){let o=e?`${e}`:"?";if(t)for(const e in t){o+=`&${e}=${t[e]}`}else o=e;return o}function y(e){const{protocol:t,hostname:o,port:i,pathname:c,search:u}=new s.URL(e.url),l="http:"===t,h={agent:e.proxy?a(e.proxy):l?new n.Agent:new r.Agent,protocol:t,hostname:o,port:i,path:c,search:p(u,e.params),method:e.method?.toLocaleUpperCase()??"GET",headers:{},timeout:e.timeout};return h.headers=function(e,t){const o={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",...e.headers??{}};return"POST"===t.method&&e.data&&(o["Content-Type"]="application/json",o["Content-Length"]=Buffer.byteLength(e.data)),o}(e,h),h}function q(e){return new Promise(((t,o)=>{const s=d(e.data);e.data=s?e.data:JSON.stringify(e.data);const a=y(e);function i(e){const{statusCode:o,headers:n}=e,r=[];e.on("data",(e=>r.push(e))),e.on("end",(()=>{const e=Buffer.concat(r);t({statusCode:o,headers:n,data:e})}))}let c;c="http:"===a.protocol?n.request(a,i):r.request(a,i),c.on("timeout",(()=>{o(new Error(`Timeout ${e.timeout}ms`))})),c.on("error",(e=>{o(e)})),"POST"!==a.method||s||c.write(e.data),c.end()}))}async function w(e,t,o,n){if(e&&n>1){const e=t?o:function(e,t=0){let o=Math.floor(Math.random()*e);for(;osetTimeout(t,e)))}(e)}else c(`Request ${u(n)} does not need to sleep, send immediately`)}const $=new class{baseConfig;constructor(e={}){this.baseConfig=e}mergeConfig(e){const t=this.baseConfig,o=structuredClone(e),n=m(o.requestConifg)?o.requestConifg:[o.requestConifg];for(const e of n){const{url:o,timeout:n,proxy:r}=e;d(t.baseUrl)||(e.url=t.baseUrl+o),d(n)&&(e.timeout=t.timeout),d(r)&&(e.proxy=t.proxy)}return d(o.intervalTime)&&(o.intervalTime=t.intervalTime),o}async useBatchRequestByMode(e,t){const o=m(e)?e:[e];let n=[];return n="sync"!==this.baseConfig.mode?await async function(e,t){const o=!d(t),n=g(t);c(`Begin execution, mode: async, total: ${u(e.length)} `);const r=[];let s=0;for(const a of e){const e=++s;await w(o,n,t,e);const i=q(a).catch((t=>`Request ${e} is an error: ${t.message}`)).then((t=>"string"==typeof t?t:{id:e,...t}));r.push(i)}c(l("All requests have been sent!"));const a=await Promise.all(r),i=[],f=[];return a.forEach((e=>{if("string"==typeof e)return f.push(e);i.push(e)})),f.forEach((e=>c(h(e)))),c(`requestsTotal: ${u(e.length)}, success: ${l(i.length)}, error: ${h(f.length)}`),i}(o,t):await async function(e,t){const o=!d(t),n=g(t);c(`Begin execution, mode: sync, total: ${u(e.length)} `);let r=0,s=0,a=0;const i=[];for(const f of e){r++,await w(o,n,t,r);try{const e=await q(f);i.push({id:r,...e}),c(l(`Request ${u(r)} is an success`)),s++}catch(e){c(h(`Request ${r} is an error: ${e.message}`)),a++}}return c(l("All requests are over!")),c(`requestsTotal: ${u(e.length)}, success: ${l(s)}, error: ${h(a)}`),i}(o,t),n}async fetchHTML(e){const{requestConifg:t}=this.mergeConfig({requestConifg:(n=e,"string"==typeof n?{url:e}:e)});var n;const r=await q(t),s=r.data.toString();return{...r,data:{html:s,jsdom:new o.JSDOM(s)}}}async fetchData(e){const{requestConifg:t,intervalTime:o}=this.mergeConfig(e),n=await this.useBatchRequestByMode(t,o),r=[];return n.forEach((e=>{const t=e.headers["content-type"]??"",o=e.data,n=t.includes("text")?o.toString():JSON.parse(o.toString());r.push({...e,data:n})})),r}async fetchFile(o){const{requestConifg:n,intervalTime:r,fileConfig:s}=this.mergeConfig(o),a=await this.useBatchRequestByMode(n,r),i=[];a.forEach((o=>{const{id:n,headers:r,data:a}=o,u=r["content-type"]??"",l=s.extension??u.split("/").pop(),f=(new Date).getTime().toString(),d=e.resolve(s.storeDir,`${f}.${l}`);try{t.writeFileSync(d,a),i.push({...o,data:{fileName:f,mimeType:u,size:a.length,filePath:d}})}catch(e){c(h(`File save error at id ${n}: ${e.message}`))}}));const f=a.length,d=i.length,g=f-d;return c(`saveTotal: ${u(f)}, success: ${l(d)}, error: ${h(g)}`),i}fetchPolling(e,t){const{Y:o,M:n,d:r,h:s,m:a}=e,i=(d(o)?0:1e3*o*60*60*24*365)+(d(n)?0:1e3*n*60*60*24*30)+(d(r)?0:1e3*r*60*60*24)+(d(s)?0:1e3*s*60*60)+(d(a)?0:1e3*a*60);let c=0;function u(){console.log(f(`Start the ${f.bold(++c)} polling`)),t(c)}u(),setInterval(u,i)}}({timeout:1e4,intervalTime:{max:2e3,min:1e3},mode:"async"});$.fetchHTML({url:"https://www.google.com.hk/",proxy:"http://127.0.0.1:14892"}).then((t=>{console.log(t.statusCode);const{jsdom:o}=t.data,n=o.window.document.querySelector(".lnXdpd");$.fetchFile({requestConifg:{url:"https://www.google.com.hk/"+n.src,proxy:"http://127.0.0.1:14892"},fileConfig:{storeDir:e.resolve(__dirname,"./upload"),extension:"jpg"}})})); +"use strict";var e=require("node:path"),t=require("node:fs"),o=require("jsdom"),n=require("node:http"),s=require("node:https"),r=require("node:url"),a=require("https-proxy-agent"),i=require("chalk");const c=console.log,u=i.hex("#a57fff"),l=i.green,h=i.red,f=i.yellow;function d(e){return void 0===e}function m(e){return"number"==typeof e}function g(e){return Array.isArray(e)}function p(e,t){let o=e?`${e}`:"?";if(t)for(const e in t){o+=`&${e}=${t[e]}`}else o=e;return o}function y(e){const{protocol:t,hostname:o,port:i,pathname:c,search:u}=new r.URL(e.url),l="http:"===t,h={agent:e.proxy?a(e.proxy):l?new n.Agent:new s.Agent,protocol:t,hostname:o,port:i,path:c,search:p(u,e.params),method:e.method?.toLocaleUpperCase()??"GET",headers:{},timeout:e.timeout};return h.headers=function(e,t){const o={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",...e.headers??{}};return"POST"===t.method&&e.data&&(o["Content-Type"]="application/json",o["Content-Length"]=Buffer.byteLength(e.data)),o}(e,h),h}function q(e){return new Promise(((t,o)=>{const r=d(e.data);e.data=r?e.data:JSON.stringify(e.data);const a=y(e);function i(e){const{statusCode:o,headers:n}=e,s=[];e.on("data",(e=>s.push(e))),e.on("end",(()=>{const e=Buffer.concat(s);t({statusCode:o,headers:n,data:e})}))}let c;c="http:"===a.protocol?n.request(a,i):s.request(a,i),c.on("timeout",(()=>{o(new Error(`Timeout ${e.timeout}ms`))})),c.on("error",(e=>{o(e)})),"POST"!==a.method||r||c.write(e.data),c.end()}))}async function $(e,t,o,n){if(e&&n>1){const e=t?o:function(e,t=0){let o=Math.floor(Math.random()*e);for(;osetTimeout(t,e)))}(e)}else c(`Request ${u(n)} does not need to sleep, send immediately`)}const w=new class{baseConfig;constructor(e={}){this.baseConfig=e}mergeConfig(e){const t=this.baseConfig,o=structuredClone(e),n=g(o.requestConifg)?o.requestConifg:[o.requestConifg];for(const e of n){const{url:o,timeout:n,proxy:s}=e;d(t.baseUrl)||(e.url=t.baseUrl+o),d(n)&&(e.timeout=t.timeout),d(s)&&(e.proxy=t.proxy)}return d(o.intervalTime)&&(o.intervalTime=t.intervalTime),o}async useBatchRequestByMode(e,t,o){const n=g(e)?e:[e];"sync"!==this.baseConfig.mode?await async function(e,t,o){const n=!d(t),s=m(t);c(`Begin execution, mode: async, total: ${u(e.length)} `);let r=0,a=0,i=0;const f=[];for(const c of e){const e=++r;await $(n,s,t,e);const u=q(c).catch((t=>(i++,`Request ${e} is an error: ${t.message}`))).then((t=>{if("string"==typeof t)return t;a++,o({id:e,...t})}));f.push(u)}c(l("All requests have been sent!")),(await Promise.all(f)).forEach((e=>e?c(h(e)):"")),c(`requestsTotal: ${u(e.length)}, success: ${l(a)}, error: ${h(i)}`)}(n,t,o):await async function(e,t,o){const n=!d(t),s=m(t);c(`Begin execution, mode: sync, total: ${u(e.length)} `);let r=0,a=0,i=0;for(const f of e){r++,await $(n,s,t,r);let e=!0,d=null;try{d={id:r,...await q(f)},c(l(`Request ${u(r)} is an success`)),a++}catch(t){e=!1,c(h(`Request ${r} is an error: ${t.message}`)),i++}e&&o&&o(d)}c(l("All requests are over!")),c(`requestsTotal: ${u(e.length)}, success: ${l(a)}, error: ${h(i)}`)}(n,t,o)}async fetchHTML(e,t){const{requestConifg:n}=this.mergeConfig({requestConifg:(s=e,"string"==typeof s?{url:e}:e)});var s;const r=await q(n),a=r.data.toString(),i={...r,data:{html:a,jsdom:new o.JSDOM(a)}};return t&&t(i),i}async fetchData(e,t){const{requestConifg:o,intervalTime:n}=this.mergeConfig(e),s=[];return await this.useBatchRequestByMode(o,n,(function(e){const o=e.headers["content-type"]??"",n=e.data,r=o.includes("text")?n.toString():JSON.parse(n.toString()),a={...e,data:r};t&&t(a),s.push(a)})),s}async fetchFile(o,n){const{requestConifg:s,intervalTime:r,fileConfig:a}=this.mergeConfig(o),i=[];await this.useBatchRequestByMode(s,r,(function(o){const{id:s,headers:r,data:u}=o,l=r["content-type"]??"",f=a.extension??l.split("/").pop(),d=(new Date).getTime().toString(),m=e.resolve(a.storeDir,`${d}.${f}`);try{t.writeFileSync(m,u);const e={...o,data:{fileName:d,mimeType:l,size:u.length,filePath:m}};n&&n(e),i.push(e)}catch(e){c(h(`File save error at id ${s}: ${e.message}`))}}));const f=g(s)?s.length:1,d=i.length,m=f-d;return c(`saveTotal: ${u(f)}, success: ${l(d)}, error: ${h(m)}`),i}fetchPolling(e,t){const{Y:o,M:n,d:s,h:r,m:a}=e,i=(d(o)?0:1e3*o*60*60*24*365)+(d(n)?0:1e3*n*60*60*24*30)+(d(s)?0:1e3*s*60*60*24)+(d(r)?0:1e3*r*60*60)+(d(a)?0:1e3*a*60);let c=0;function u(){console.log(f(`Start the ${f.bold(++c)} polling`)),t(c)}u(),setInterval(u,i)}}({timeout:1e4,intervalTime:{max:2e3,min:1e3},mode:"async"});w.fetchPolling({m:3},(()=>{w.fetchHTML("https://www.bilibili.com/guochuang/",(e=>{console.log("fetchHTML Callback: ",e.statusCode)})).then((t=>{const{jsdom:o}=t.data,n=[];o.window.document.querySelectorAll(".chief-recom-item").forEach((e=>n.push(e.querySelector("img").src)));const s=n.map((e=>({url:`https:${e}`})));s.pop(),w.fetchFile({requestConifg:s,fileConfig:{storeDir:e.resolve(__dirname,"./upload")}},(e=>{console.log(e.id,e.statusCode,e.data.fileName)}))}))})); diff --git a/test/start/index.ts b/test/start/index.ts index acb072c..f42683c 100644 --- a/test/start/index.ts +++ b/test/start/index.ts @@ -17,46 +17,30 @@ const testXCrawl = new XCrawl({ // ] // }) -// testXCrawl.fetchPolling({ m: 3 }, () => { -// testXCrawl.fetchHTML('https://www.bilibili.com/guochuang/').then((res) => { -// const { jsdom } = res.data - -// const imgSrc: string[] = [] -// const recomEls = jsdom.window.document.querySelectorAll('.chief-recom-item') -// recomEls.forEach((item) => imgSrc.push(item.querySelector('img')!.src)) - -// const requestConifg = imgSrc.map((src) => ({ url: `https:${src}` })) -// requestConifg.pop() - -// testXCrawl.fetchFile({ -// requestConifg, -// fileConfig: { storeDir: path.resolve(__dirname, './upload') } -// }) -// }) -// }) - -// 'http://127.0.0.1:14892' -testXCrawl - .fetchHTML({ - url: 'https://www.google.com.hk/', - proxy: 'http://127.0.0.1:14892' - }) - .then((res) => { - console.log(res.statusCode) - - const { jsdom } = res.data - - const imgEl = - jsdom.window.document.querySelector('.lnXdpd') - - testXCrawl.fetchFile({ - requestConifg: { - url: 'https://www.google.com.hk/' + imgEl!.src, - proxy: 'http://127.0.0.1:14892' - }, - fileConfig: { - storeDir: path.resolve(__dirname, './upload'), - extension: 'jpg' - } +testXCrawl.fetchPolling({ m: 3 }, () => { + testXCrawl + .fetchHTML('https://www.bilibili.com/guochuang/', (res) => { + console.log('fetchHTML Callback: ', res.statusCode) }) - }) + .then((res) => { + const { jsdom } = res.data + + const imgSrc: string[] = [] + const recomEls = + jsdom.window.document.querySelectorAll('.chief-recom-item') + recomEls.forEach((item) => imgSrc.push(item.querySelector('img')!.src)) + + const requestConifg = imgSrc.map((src) => ({ url: `https:${src}` })) + requestConifg.pop() + + testXCrawl.fetchFile( + { + requestConifg, + fileConfig: { storeDir: path.resolve(__dirname, './upload') } + }, + (res) => { + console.log(res.id, res.statusCode, res.data.fileName) + } + ) + }) +})