add example code to README

Danceiny · Mar 28, 2019 · d4f63ce · d4f63ce
1 parent 629e677
commit d4f63ce
Showing 1 changed file with 150 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -17,8 +17,157 @@
     >`pip install -U parser_engine`
     
 ### 示例
+- 极简版，使用`CrawlSpider`的rules机制。
+```python
+from parser_engine import TemplateAnnotation
+@TemplateAnnotation(tpls="demo")
+class DemoSpider4(CrawlSpider):
+    name = "demo4"
+    start_urls = [
+        "http://github.cannot.cc/baixing-helper"
+    ]
+```
+
+- 使用scrapy_redis，解析start_urls的响应。
+```python
+from parser_engine import TemplateAnnotation
+from parser_engine.clue.spider import ClueSpider
+@TemplateAnnotation(start_url_tpl=({
+    "name": "zhongguozhongqi_xiaoshouwangluo",
+    "itemname": "HuocheDealerItem",
+    "parent": {
+        "xpath": "//tr[@class=\"bgcolor2\"]"
+    },
+    "fields": [
+        {
+            "key": "area",
+            "xpath": "td[1]/text()",
+            "value_type": "stripped_string"
+        }, {
+            "key": "leads_name",
+            "xpath": "td[2]/text()",
+            "value_type": "stripped_string"
+        }, {
+            "key": "address",
+            "xpath": "td[3]/text()",
+            "value_type": "stripped_string"
+        }, {
+            "key": "phone",
+            "xpath": "td[5]/text()",
+            "value_type": "stripped_string"
+        }
+    ]
+}), channel='zhongguozhongqi', leads_src='中国重汽')
+class ZhongguozhongqiSpider(ClueSpider):
+    name = 'zhongguozhongqi'
+    def parse(self, response):
+        items = self._parse_start_url(response)
+        for item in items:
+            phone = item.get('phone')
+            if phone:
+                item['phone'] = phone.replace('、', ',')
+            yield item
+        self.finish_clue(response, len(items))
+```
+
+- 使用scrapy_redis，灵活运用多种PE特性。
+```python
+from parser_engine.clue.spider import ClueSpider
+from parser_engine import TemplateAnnotation
+from parser_engine.clue.items import ClueItem
+from parser_engine.request import TaskRequest
+from scrapy import Request
+@TemplateAnnotation(start_url_tpl=({
+                                       "name": "youka_shop_listing_api",
+                                       "parent": {
+                                           "json_key": "data",
+                                       },
+                                       "fields": [{
+                                           "key": "totalPage",
+                                           "json_key": "totalPage",
+
+                                       }, {
+                                           "key": "ids",
+                                           "json_path": "dataList[*].id"
+                                       }]
+                                   },),
+    tpls=({
+        "name": "youka_shop_detail_api",
+        "itemname": "HuocheDealerItem",
+        "parent": {
+            "json_key": "data",
+        },
+        "fields": [{
+            "key": "company_type",
+            "json_key": "category",
+            "mapper": {
+                1: "二手车直营店",
+                2: "4S店"
+            }
+        }, {
+            "key": "dealer_id",
+            "json_key": "id",
+            "required": 1,
+        }, {
+            "key": "leads_name",
+            "json_key": "shopName",
+        }, {
+            "key": "area",
+            "json_path": "districtDto.districtName",
+            "value_type": "singleton"
+        }, {
+            "key": "city",
+            "json_path": "cityDto.cityName",
+            "value_type": "singleton"
+        }, {
+            "key": "service_phone",
+            "default_value": "",
+        }, {
+            "key": "wechat",
+            "json_key": "wechat",
+        },  {
+            "key": "tags",
+            "json_key": "tags",
+            "join": ","
+        }]
+    }), channel='youka', leads_src='优卡')
+class YoukaSpider(ClueSpider):
+    name = 'youka'
+    custom_settings = {
+        'CONCURRENT_REQUESTS': 2,
+        'CONCURRENT_REQUESTS_PER_DOMAIN': 1
+    }
+    def parse(self, response):
+        items = self._parse_start_url(response)
+        meta = response.meta
+        clue_id = meta.get('clue_id')
+        from_url = response.request.url
+        if meta.get('open_pages'):
+            total_page = items[0]['totalPage']
+            import re
+            current_page = int(re.findall('page=(\\d+)', from_url)[0])
+            for i in range(1, total_page + 1):
+                if current_page == i:
+                    continue
+                url = "http://www.china2cv.com/truck-foton-web/api/shop/v1/getShopList?page=%d&pageSize=10" % i
+                yield ClueItem({"project": "huoche", "spider": self.name, "req": TaskRequest(
+                    url=url,
+                    meta={"from_clue_id": clue_id}
+                )})
+        for item in items:
+            for id in item['ids']:
+                r = Request(url="http://www.china2cv.com/truck-foton-web/api/shop/v1/getShopInfo?shopId=%d" % int(id),
+                            callback=self._response_downloaded)
+                r.meta.update(rule=0, from_clue_id=clue_id)
+                yield r
+
+    def process_results(self, response, results):
+        for item in results:
+            item['url'] = 'http://www.china2cv.com/storeDetail.html?typess=1&shopId=' + str(item['dealer_id'])
+        return results
+```
 
-请参考：[examples](./examples)。
+更多请参考：[examples](./examples)。
 
 ### 原理
 - 解析器