From 629e677fe7c1807c6fac284ead9eb1cf73b626b7 Mon Sep 17 00:00:00 2001
From: Danceiny <danceiny@gmail.com>
Date: Thu, 28 Mar 2019 23:43:53 +0800
Subject: [PATCH] update examples

---
 README.md                                     |   9 +-
 VERSION                                       |   2 +-
 demo/demo/items.py                            |  81 ------------
 demo/demo/middlewares.py                      | 103 ---------------
 demo/demo/pipelines.py                        |  76 -----------
 demo/demo/settings.py                         | 108 ----------------
 examples/README.md                            |  41 ++++++
 {demo => examples}/demo/__init__.py           |   0
 examples/demo/demo/__init__.py                |   0
 examples/demo/demo/items.py                   |  40 ++++++
 examples/demo/demo/pipelines.py               |   5 +
 examples/demo/demo/settings.py                |  15 +++
 .../demo}/demo/spiders/__init__.py            |   0
 .../demo}/demo/spiders/demo_spider.py         |  10 +-
 .../demo}/demo/spiders/gaode_spider.py        |  19 +--
 {demo => examples/demo}/parser_engine2.json   |   2 +-
 {demo => examples/demo}/scrapy.cfg            |   1 -
 examples/huoche/huoche/__init__.py            |   0
 examples/huoche/huoche/items.py               |  64 ++++++++++
 examples/huoche/huoche/logger.py              |  30 +++++
 examples/huoche/huoche/pipelines.py           |  50 ++++++++
 examples/huoche/huoche/settings.py            |  38 ++++++
 examples/huoche/huoche/spiders/__init__.py    |   4 +
 .../huoche/huoche/spiders/dongfeng_spider.py  |  45 +++++++
 .../huoche/huoche/spiders/futian_spider.py    |  21 ++++
 .../huoche/spiders/kachezhijia_spider.py      |  95 ++++++++++++++
 examples/huoche/huoche/spiders/sxqc_spider.py |  31 +++++
 .../huoche/spiders/yiqijiefang_spider.py      |  50 ++++++++
 examples/huoche/huoche/spiders/youka.py       | 118 ++++++++++++++++++
 .../huoche/huoche/spiders/zhongguozhongqi.py  |  44 +++++++
 examples/huoche/requirements.txt              |   5 +
 parser_engine/clue/pipelines.py               |   7 +-
 parser_engine/decorator.py                    |  20 ++-
 parser_engine/request.py                      |  32 ++---
 parser_engine/spider.py                       |   9 +-
 35 files changed, 742 insertions(+), 433 deletions(-)
 delete mode 100644 demo/demo/items.py
 delete mode 100644 demo/demo/middlewares.py
 delete mode 100644 demo/demo/pipelines.py
 delete mode 100644 demo/demo/settings.py
 create mode 100644 examples/README.md
 rename {demo => examples}/demo/__init__.py (100%)
 create mode 100644 examples/demo/demo/__init__.py
 create mode 100644 examples/demo/demo/items.py
 create mode 100644 examples/demo/demo/pipelines.py
 create mode 100644 examples/demo/demo/settings.py
 rename {demo => examples/demo}/demo/spiders/__init__.py (100%)
 rename {demo => examples/demo}/demo/spiders/demo_spider.py (80%)
 rename {demo => examples/demo}/demo/spiders/gaode_spider.py (60%)
 rename {demo => examples/demo}/parser_engine2.json (98%)
 rename {demo => examples/demo}/scrapy.cfg (88%)
 create mode 100644 examples/huoche/huoche/__init__.py
 create mode 100644 examples/huoche/huoche/items.py
 create mode 100644 examples/huoche/huoche/logger.py
 create mode 100644 examples/huoche/huoche/pipelines.py
 create mode 100644 examples/huoche/huoche/settings.py
 create mode 100644 examples/huoche/huoche/spiders/__init__.py
 create mode 100644 examples/huoche/huoche/spiders/dongfeng_spider.py
 create mode 100644 examples/huoche/huoche/spiders/futian_spider.py
 create mode 100644 examples/huoche/huoche/spiders/kachezhijia_spider.py
 create mode 100644 examples/huoche/huoche/spiders/sxqc_spider.py
 create mode 100644 examples/huoche/huoche/spiders/yiqijiefang_spider.py
 create mode 100644 examples/huoche/huoche/spiders/youka.py
 create mode 100644 examples/huoche/huoche/spiders/zhongguozhongqi.py
 create mode 100644 examples/huoche/requirements.txt

diff --git a/README.md b/README.md
index 41c4a8e..f29676f 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,10 @@
 - 安装稳定版：
     >`pip install -U parser_engine`
     
+### 示例
+
+请参考：[examples](./examples)。
+
 ### 原理
 - 解析器
     >PE向调用方提供一套简单、易懂的参数，实际会将其`编译`成较为复杂的xpath表达式，再借助scrapy封装的解析器将所需内容提取出来。
@@ -41,8 +45,6 @@
     >一个简单的需求场景：API返回的性别字段是0和1，但是需要将其转换成"男"和"女"。
     
 ### 待做清单
-- 功能
-
 - 优化
     - [ ] 支持直接在`Item`的类定义中定义模板
         >用法示例：原模板的`itemname`参数通过注解传参，其他的模板参数定义在`Item`类中，如下所示。
@@ -124,9 +126,6 @@ TemplateAnnotation注解中传进来的参数，除了下面列出的，其他
     
 - tpls: 模板的数组，或者模板id的数组
 
-其它约定：
-- Spider类的`name`类变量，会被翻译成`business`赋值给item。
-
 具体请参考[decorator.py](./parser_engine/decorator.py)中的注释及源代码。
 
 #### Html格式
diff --git a/VERSION b/VERSION
index 6da28dd..8294c18 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.1.1
\ No newline at end of file
+0.1.2
\ No newline at end of file
diff --git a/demo/demo/items.py b/demo/demo/items.py
deleted file mode 100644
index 8c56a00..0000000
--- a/demo/demo/items.py
+++ /dev/null
@@ -1,81 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Define here the models for your scraped items
-#
-# See documentation in:
-# https://doc.scrapy.org/en/latest/topics/items.html
-
-from scrapy.item import Item, Field
-from peewee import Model, MySQLDatabase, PrimaryKeyField, CharField, IntegerField
-
-db = None
-
-
-# db = PostgresqlDatabase("bxdw", host='bi.baixing.com', port=35432, user="biz_user", passwd="biz_user", charset="utf8")
-def init_db(datasource):
-    global db
-    if not db:
-        from scrapy.utils import project
-        settings = project.get_project_settings()
-        if datasource == 'mysql':
-            db = MySQLDatabase(database=settings.get("MYSQL_DATABASE"),
-                               host=settings.get("MYSQL_HOST"),
-                               user=settings.get("MYSQL_USER"),
-                               passwd=settings.get("MYSQL_PASSWORD"),
-                               port=3306, charset="utf8")
-    return db
-
-
-class BaseItem(Item):
-    channel_id = Field()
-    channel = Field()
-    created_time = Field()
-
-
-class DemoItem(BaseItem):
-    # define the fields for your item here like:
-    name = Field()
-    text = Field()
-    author = Field()
-    steps = Field()
-
-
-class ClueItem(Item):
-    channel = Field()
-    name = Field()
-    index = Field()
-    url = Field()
-    from_url = Field()
-    status = Field()
-    created_time = Field()
-    finished_time = Field()
-
-
-class LeadsItem(Item):
-    channel_id = Field()
-    channel = Field()
-    name = Field()
-    contact = Field()
-    contact_type = Field()
-    city = Field()
-    category = Field()
-    address = Field()
-    created_time = Field()
-    extra = Field()
-
-
-class Leads(Model):
-    id = PrimaryKeyField()
-    channel_id = CharField(verbose_name="渠道id", max_length=50, null=False, unique=True)
-    channel = CharField(verbose_name="渠道名称", null=False)
-    name = CharField(verbose_name="名称", null=False, unique=True)
-    contact = CharField(verbose_name="联系方式", null=False)
-    contact_type = CharField(verbose_name="联系方式类型", null=False)
-    city = CharField(verbose_name="城市")
-    category = CharField(verbose_name="类目/行业")
-    address = CharField(verbose_name="地址")
-    created_time = IntegerField(verbose_name="创建时间")
-    extra = CharField(verbose_name="附加信息")
-
-    class Meta:
-        database = init_db("mysql")
diff --git a/demo/demo/middlewares.py b/demo/demo/middlewares.py
deleted file mode 100644
index fd80448..0000000
--- a/demo/demo/middlewares.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Define here the models for your spider middleware
-#
-# See documentation in:
-# https://doc.scrapy.org/en/latest/topics/spider-middleware.html
-
-from scrapy import signals
-
-
-class ProcessorSpiderMiddleware(object):
-    # Not all methods need to be defined. If a method is not defined,
-    # scrapy acts as if the spider middleware does not modify the
-    # passed objects.
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        # This method is used by Scrapy to create your spiders.
-        s = cls()
-        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
-        return s
-
-    def process_spider_input(self, response, spider):
-        # Called for each response that goes through the spider
-        # middleware and into the spider.
-
-        # Should return None or raise an exception.
-        return None
-
-    def process_spider_output(self, response, result, spider):
-        # Called with the results returned from the Spider, after
-        # it has processed the response.
-
-        # Must return an iterable of Request, dict or Item objects.
-        for i in result:
-            yield i
-
-    def process_spider_exception(self, response, exception, spider):
-        # Called when a spider or process_spider_input() method
-        # (from other spider middleware) raises an exception.
-
-        # Should return either None or an iterable of Response, dict
-        # or Item objects.
-        pass
-
-    def process_start_requests(self, start_requests, spider):
-        # Called with the start requests of the spider, and works
-        # similarly to the process_spider_output() method, except
-        # that it doesn’t have a response associated.
-
-        # Must return only requests (not items).
-        for r in start_requests:
-            yield r
-
-    def spider_opened(self, spider):
-        spider.logger.info('Spider opened: %s' % spider.name)
-
-
-class ProcessorDownloaderMiddleware(object):
-    # Not all methods need to be defined. If a method is not defined,
-    # scrapy acts as if the downloader middleware does not modify the
-    # passed objects.
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        # This method is used by Scrapy to create your spiders.
-        s = cls()
-        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
-        return s
-
-    def process_request(self, request, spider):
-        # Called for each request that goes through the downloader
-        # middleware.
-
-        # Must either:
-        # - return None: continue processing this request
-        # - or return a Response object
-        # - or return a Request object
-        # - or raise IgnoreRequest: process_exception() methods of
-        #   installed downloader middleware will be called
-        return None
-
-    def process_response(self, request, response, spider):
-        # Called with the response returned from the downloader.
-
-        # Must either;
-        # - return a Response object
-        # - return a Request object
-        # - or raise IgnoreRequest
-        return response
-
-    def process_exception(self, request, exception, spider):
-        # Called when a download handler or a process_request()
-        # (from other downloader middleware) raises an exception.
-
-        # Must either:
-        # - return None: continue processing this exception
-        # - return a Response object: stops process_exception() chain
-        # - return a Request object: stops process_exception() chain
-        pass
-
-    def spider_opened(self, spider):
-        spider.logger.info('Spider opened: %s' % spider.name)
diff --git a/demo/demo/pipelines.py b/demo/demo/pipelines.py
deleted file mode 100644
index a997c40..0000000
--- a/demo/demo/pipelines.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Define your item pipelines here
-#
-# Don't forget to add your pipeline to the ITEM_PIPELINES setting
-# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
-
-import pymongo
-import logging
-from .items import LeadsItem, Leads, DemoItem
-
-
-class DuplicatesPipeline(object):
-    def __init__(self):
-        self.leads_id_set = set()
-        self.leads_name_set = set()
-
-    def process_item(self, item, spider):
-        channel_id = item['channel_id']
-        name = item.get('name')
-        if channel_id in self.leads_id_set:
-            pass
-        if name in self.leads_name_set:
-            pass
-        self.leads_id_set.add(channel_id)
-        self.leads_name_set.add(name)
-        return item
-
-
-class MongoDBPipeline(object):
-    collection_name = 'leads'
-
-    def __init__(self, mongo_uri, mongo_db):
-        self.mongo_uri = mongo_uri
-        self.mongo_db = mongo_db
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        return cls(
-            mongo_uri=crawler.settings.get('MONGO_URI'),
-            mongo_db=crawler.settings.get('MONGO_DATABASE')
-        )
-
-    def open_spider(self, spider):
-        self.client = pymongo.MongoClient(self.mongo_uri)
-        self.db = self.client[self.mongo_db]
-
-    def close_spider(self, spider):
-        self.client.close()
-
-    def process_item(self, item, spider):
-        self.db[self.collection_name].insert(dict(item))
-        logging.debug("item added to MongoDB")
-        return item
-
-
-class MySQLPipeline(object):
-    def process_item(self, item, spider):
-        if isinstance(item, LeadsItem):
-            if not Leads.table_exists():
-                Leads.create_table()
-            leads = Leads(
-                channel_id=item['channel_id'],
-                channel=item['channel'],
-                name=item['name'],
-                contact=item['contact'],
-                contact_type=item['contact_type'],
-                city=item['city'],
-                category=item['category'],
-                address=item['address'],
-                created_time=item['created_time'],
-                extra=item.get('extra', ''))
-            leads.save()
-            return item
-        elif isinstance(item, DemoItem):
-            print("pipeline get DemoItem", item)
diff --git a/demo/demo/settings.py b/demo/demo/settings.py
deleted file mode 100644
index 97ab155..0000000
--- a/demo/demo/settings.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Scrapy settings for demo project
-#
-# For simplicity, this file contains only settings considered important or
-# commonly used. You can find more settings consulting the documentation:
-#
-#     https://doc.scrapy.org/en/latest/topics/settings.html
-#     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
-#     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
-
-BOT_NAME = 'demo'
-
-SPIDER_MODULES = ['demo.spiders']
-NEWSPIDER_MODULE = 'demo.spiders'
-PARSER_ENGINE_CONFIG_FILE = "parser_engine2.json"
-# SCHEDULER = "scrapy_redis.scheduler.Scheduler"
-# DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"
-# SCHEDULER_QUEUE_CLASS = 'scrapy_redis.queue.SpiderPriorityQueue'
-
-# REDIS_HOST = '127.0.0.1'
-# REDIS_PORT = 6379
-
-MYSQL_HOST = '127.0.0.1'
-MYSQL_USER = 'root'
-MYSQL_PASSWORD = ''
-MYSQL_DATABASE = 'test'
-
-SCHEDULER_PERSIS = True
-
-# Crawl responsibly by identifying yourself (and your website) on the user-agent
-# USER_AGENT = 'demo (+http://www.yourdomain.com)'
-USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:7.0.1) Gecko/20100101 Firefox/7.7'
-# Obey robots.txt rules
-ROBOTSTXT_OBEY = False
-
-# Configure maximum concurrent requests performed by Scrapy (default: 16)
-# CONCURRENT_REQUESTS = 32
-
-# Configure a delay for requests for the same website (default: 0)
-# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
-# See also autothrottle settings and docs
-DOWNLOAD_DELAY = 3
-# The download delay setting will honor only one of:
-CONCURRENT_REQUESTS_PER_DOMAIN = 5
-# CONCURRENT_REQUESTS_PER_IP = 16
-
-# Disable cookies (enabled by default)
-COOKIES_ENABLED = False
-
-# Disable Telnet Console (enabled by default)
-# TELNETCONSOLE_ENABLED = False
-
-# Override the default request headers:
-# DEFAULT_REQUEST_HEADERS = {
-#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-#   'Accept-Language': 'en',
-# }
-
-# Enable or disable spider middlewares
-# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
-# SPIDER_MIDDLEWARES = {
-#    'demo.middlewares.ProcessorSpiderMiddleware': 543,
-# }
-
-# Enable or disable downloader middlewares
-# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
-# DOWNLOADER_MIDDLEWARES = {
-#    'demo.middlewares.ProcessorDownloaderMiddleware': 543,
-# }
-
-# Enable or disable extensions
-# See https://doc.scrapy.org/en/latest/topics/extensions.html
-# EXTENSIONS = {
-#    'scrapy.extensions.telnet.TelnetConsole': None,
-# }
-
-# Configure item pipelines
-# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
-ITEM_PIPELINES = {
-    # 'scrapy_redis.pipelines.RedisPipeline': 300,
-    # 'demo.pipelines.DuplicatesPipeline': 300,
-    # 'demo.pipelines.MongoDBPipeline': 350,
-    'demo.pipelines.MySQLPipeline': 350,
-}
-# MONGO_URI = 'mongodb://localhost:27017'
-# MONGO_DATABASE = 'test'
-
-# Enable and configure the AutoThrottle extension (disabled by default)
-# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
-# AUTOTHROTTLE_ENABLED = True
-# The initial download delay
-# AUTOTHROTTLE_START_DELAY = 5
-# The maximum download delay to be set in case of high latencies
-# AUTOTHROTTLE_MAX_DELAY = 60
-# The average number of requests Scrapy should be sending in parallel to
-# each remote server
-# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
-# Enable showing throttling stats for every response received:
-# AUTOTHROTTLE_DEBUG = False
-
-# Enable and configure HTTP caching (disabled by default)
-# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
-# HTTPCACHE_ENABLED = True
-# HTTPCACHE_EXPIRATION_SECS = 0
-# HTTPCACHE_DIR = 'httpcache'
-# HTTPCACHE_IGNORE_HTTP_CODES = []
-# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 0000000..1072a26
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,41 @@
+## Examples of parser-engine
+### demo
+
+>written before v0.1.0
+
+为了验证PE的设计理念，从`http://github.cannot.cc/baixing-helper/`这一GitHub Pages的简单目录页着手，主要测试了`parser_engine.spider.PECrawlSpider`和PE模板配置文件的编写，以及PE对配置文件的加载、执行、输出等。
+
+该项目不需要任何redis、db等依赖，可以直接进入到目录下`scrapy crawl **`运行，观察控制台标准输出即可。
+
+注意，GitHub Pages似乎有轻微的反爬（症状是`连接被拒绝`），需要控制爬取速率。
+
+### huoche
+
+>written after v0.1.0
+
+抓取国内几家货车网站的经销商信息。
+
+PE的大量特性，是在该项目开发过程中遇到问题之后开发的，因此该demo具有较高的参考意义。
+
+`parser_engine.spider.PESpider`及其子类`parser_engine.clue.spider.ClueSpider`，基于`scrapy_redis`进行了二次开发，需要构造一个[TaskRequest](../parser_engine/request.py)对象，经json序列化后扔进某个spider对应的redis队列（通常是redis的list结构）中。
+
+如果对如何构造该demo中所需的`TaskRequest`有兴趣，可以联系 [Danceiny](mailto:danceiny@gmail.com)。这里给出[中国重汽](./huoche/huoche/spiders/zhongguozhongqi.py)的实际例子：
+```python
+import json
+import redis
+r = redis.from_url("redis://127.0.0.1:6379")
+task_reqs = []
+for i in range(34):
+    task_reqs.append({
+        'url': 'http://www.cnhtc.com.cn/View/XiaoShouWangLuoDetail.aspx?sc=5&Category=1&PV=0010%s' % (
+            str(i) if i >= 10 else ('0%d' % i)),
+        'headers': {
+            'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'
+        }
+    })
+for task_req in task_reqs:
+    r.lpush('huoche:zhongguozhongqi:start_urls', json.dumps(task_req))
+```
+
+
+运行该项目前，除了安装python依赖(`pip install -r requirements.txt`)之外，还需要部署并配置好redis、mysql，相应的连接配置项见[settings.py](./huoche/huoche/settings.py)。
diff --git a/demo/demo/__init__.py b/examples/demo/__init__.py
similarity index 100%
rename from demo/demo/__init__.py
rename to examples/demo/__init__.py
diff --git a/examples/demo/demo/__init__.py b/examples/demo/demo/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/demo/demo/items.py b/examples/demo/demo/items.py
new file mode 100644
index 0000000..0c5347a
--- /dev/null
+++ b/examples/demo/demo/items.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+from scrapy.item import Item, Field
+
+
+class BaseItem(Item):
+    channel_id = Field()
+    channel = Field()
+    created_time = Field()
+
+
+class DemoItem(BaseItem):
+    # define the fields for your item here like:
+    name = Field()
+    text = Field()
+    author = Field()
+    steps = Field()
+
+
+class ClueItem(Item):
+    channel = Field()
+    name = Field()
+    index = Field()
+    url = Field()
+    from_url = Field()
+    status = Field()
+    created_time = Field()
+    finished_time = Field()
+
+
+class LeadsItem(Item):
+    channel_id = Field()
+    channel = Field()
+    name = Field()
+    contact = Field()
+    contact_type = Field()
+    city = Field()
+    category = Field()
+    address = Field()
+    created_time = Field()
+    extra = Field()
diff --git a/examples/demo/demo/pipelines.py b/examples/demo/demo/pipelines.py
new file mode 100644
index 0000000..b57c400
--- /dev/null
+++ b/examples/demo/demo/pipelines.py
@@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+class DemoPipeline(object):
+    def process_item(self, item, spider):
+        print("pipeline receive item, type: ", type(item), item)
+        return item
diff --git a/examples/demo/demo/settings.py b/examples/demo/demo/settings.py
new file mode 100644
index 0000000..6d97708
--- /dev/null
+++ b/examples/demo/demo/settings.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+BOT_NAME = 'demo'
+SPIDER_MODULES = ['demo.spiders']
+NEWSPIDER_MODULE = 'demo.spiders'
+PARSER_ENGINE_CONFIG_FILE = "parser_engine2.json"
+SCHEDULER_PERSIS = True
+USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:7.0.1) Gecko/20100101 Firefox/7.7'
+ROBOTSTXT_OBEY = False
+DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+CONCURRENT_REQUESTS_PER_DOMAIN = 1
+COOKIES_ENABLED = False
+ITEM_PIPELINES = {
+    'demo.pipelines.DemoPipeline': 350,
+}
\ No newline at end of file
diff --git a/demo/demo/spiders/__init__.py b/examples/demo/demo/spiders/__init__.py
similarity index 100%
rename from demo/demo/spiders/__init__.py
rename to examples/demo/demo/spiders/__init__.py
diff --git a/demo/demo/spiders/demo_spider.py b/examples/demo/demo/spiders/demo_spider.py
similarity index 80%
rename from demo/demo/spiders/demo_spider.py
rename to examples/demo/demo/spiders/demo_spider.py
index 16e8a10..5e15a5a 100644
--- a/demo/demo/spiders/demo_spider.py
+++ b/examples/demo/demo/spiders/demo_spider.py
@@ -4,9 +4,9 @@
 from scrapy.spiders import CrawlSpider
 
 
-@TemplateAnnotation(tpls=("demo", "json-api-demo"), channel_id="cannot.cc", channel="Danceiny")
+@TemplateAnnotation(tpls=("demo", "dict-api-demo"), channel_id="cannot.cc", channel="Danceiny")
 class DemoSpider(PECrawlSpider):
-    name = "demo"
+    name = "demo1"
 
     start_urls = [
         "http://github.cannot.cc/baixing-helper/"
@@ -44,13 +44,9 @@ class DemoSpider3(PECrawlSpider):
 
     name = "demo3"
     start_urls = [
-        "http://172.31.1.4:30815/api/dict/area/0?childrenDepth=1",
-        # "https://restapi.amap.com/v3/place/text?citylimit=true&output=json&offset=20&city=shanghai&page=1&key=0f1ef779f17ac1f0541bef5452eb7570&keywords=%E6%95%99%E8%82%B2"
+        "https://restapi.amap.com/v3/place/text?citylimit=true&output=json&offset=20&city=shanghai&page=1&key=0f1ef779f17ac1f0541bef5452eb7570&keywords=%E6%95%99%E8%82%B2"
     ]
 
-    def callback(self, data):
-        print("准备持久化", data)
-
     def process_results(self, response, results):
         print("处理结果", results)
         return results
diff --git a/demo/demo/spiders/gaode_spider.py b/examples/demo/demo/spiders/gaode_spider.py
similarity index 60%
rename from demo/demo/spiders/gaode_spider.py
rename to examples/demo/demo/spiders/gaode_spider.py
index b0aa9ae..9d42d81 100644
--- a/demo/demo/spiders/gaode_spider.py
+++ b/examples/demo/demo/spiders/gaode_spider.py
@@ -10,28 +10,11 @@ class GaodeSpider(CrawlSpider):
     name = "gaode"
 
     def generate_urls(self):
-        # keywords = getattr(self, 'keywords', None)
-        # if keywords is None:
         keywords = "教育|培训"
         key = '0f1ef779f17ac1f0541bef5452eb7570'
         total = 2
         adcodes = [
             310101,  # 黄浦区
-            # 310104,#徐汇区
-            # 310105,#长宁区
-            # 310106,#静安区
-            # 310107,#普陀区
-            # 310109,#虹口区
-            # 310110,#杨浦区
-            # 310115,#浦东新区
-            # 310112,#闵行区
-            # 310113,#宝山区
-            # 310114,#嘉定区
-            # 310116,#金山区
-            # 310117,#松江区
-            # 310118,#青浦区
-            # 310120,#奉贤区
-            # 310151,#崇明区
         ]
         urls = []
         for adcode in adcodes:
@@ -42,7 +25,7 @@ def generate_urls(self):
         return urls
 
     def parse(self, response):
-        items = self._parse(response)
+        items = self._parse_start_url(response)
         if items:
             for item in items:
                 yield item
diff --git a/demo/parser_engine2.json b/examples/demo/parser_engine2.json
similarity index 98%
rename from demo/parser_engine2.json
rename to examples/demo/parser_engine2.json
index d6a1281..8e4b742 100644
--- a/demo/parser_engine2.json
+++ b/examples/demo/parser_engine2.json
@@ -6,7 +6,7 @@
       "fields": [
         {
           "dom_id": null,
-          "_css": null,
+          "css": null,
           "xpath": null,
           "tags": [
             "h3"
diff --git a/demo/scrapy.cfg b/examples/demo/scrapy.cfg
similarity index 88%
rename from demo/scrapy.cfg
rename to examples/demo/scrapy.cfg
index 0dd4859..306e2a1 100644
--- a/demo/scrapy.cfg
+++ b/examples/demo/scrapy.cfg
@@ -7,7 +7,6 @@
 default = demo.settings
 
 [deploy]
-;url = http://172.31.1.4:30217/
 url = http://127.0.0.1:6800/
 
 project = demo
diff --git a/examples/huoche/huoche/__init__.py b/examples/huoche/huoche/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/huoche/huoche/items.py b/examples/huoche/huoche/items.py
new file mode 100644
index 0000000..f316e8c
--- /dev/null
+++ b/examples/huoche/huoche/items.py
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://doc.scrapy.org/en/latest/topics/items.html
+
+from scrapy.item import Item, Field
+from peewee import Model, PrimaryKeyField, CharField, IntegerField
+from parser_engine.config import mysqldb
+
+
+class HuocheDealerItem(Item):
+    # channel + dealer_id 联合构成该dealer的唯一id
+    channel = Field()
+    dealer_id = Field()
+    leads_src = Field()  # 线索渠道
+    url = Field()  # 网站URL
+    company_type = Field()  # 公司类型
+    leads_name = Field()  # leads名称:公司名称，服务站名称
+    area = Field()  # 区域
+    province = Field()  # 省份
+    city = Field()  # 城市
+    address = Field()  # 地址
+    phone = Field()  # 电话
+    service_phone = Field()  # 24 小时服务电话
+    wechat = Field()  # 微信
+    linkman = Field()  # 联系人
+    main_model = Field()  # 主销车型
+    online_source = Field()  # 在线车源
+    business_scope = Field()  # 经营范围
+    brand = Field()  # 品牌
+    tags = Field()  # 标签
+
+    crawled_time = Field()
+
+
+class HuocheDealerModel(Model):
+    id = PrimaryKeyField()
+    dealer_id = CharField(default='', max_length=32)  # 在该渠道的id
+    channel = CharField(default='', max_length=16)  # channel是英文版的leads_src
+    leads_src = CharField(default='', max_length=16)  # 线索渠道
+    phone = CharField(default='', max_length=64)  # 电话
+    wechat = CharField(default='', max_length=32)  # 微信
+    url = CharField(default='', max_length=64)  # 网站URL
+    brand = CharField(default='', max_length=16)  # 品牌
+    tags = CharField(default='', max_length=64)  # 标签
+    company_type = CharField(default='', max_length=16)  # 公司类型
+    leads_name = CharField(default='', max_length=64)  # leads名称:公司名称，服务站名称
+    area = CharField(default='', max_length=16)  # 区域
+    province = CharField(default='', max_length=16)  # 省份
+    city = CharField(default='', max_length=16)  # 城市
+    address = CharField(default='', max_length=64)  # 地址
+    service_phone = CharField(default='', max_length=64)  # 24 小时服务电话
+    linkman = CharField(default='', max_length=64)  # 联系人
+    main_model = CharField(default='', max_length=64)  # 主销车型
+    online_source = CharField(default='', max_length=64)  # 在线车源
+    business_scope = CharField(default='', max_length=64)  # 经营范围
+
+    crawled_time = IntegerField(default=0)
+
+    class Meta:
+        database = mysqldb
+        table_name = 'huoche_dealer'
diff --git a/examples/huoche/huoche/logger.py b/examples/huoche/huoche/logger.py
new file mode 100644
index 0000000..de050af
--- /dev/null
+++ b/examples/huoche/huoche/logger.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+import json
+from parser_engine.patch import get_redis
+from parser_engine.singleton import Singleton
+
+
+@Singleton
+class DwLogger:
+    def __init__(self, write_filename='dw_local.txt'):
+        from scrapy.utils import project
+        settings = project.get_project_settings()
+        self.r = get_redis(**settings.getdict('REDIS_PARAMS'))
+        self.ENV = settings.get('ENV')
+        if write_filename:
+            self.f = open(write_filename, 'a+')
+        else:
+            self.f = None
+
+    def __del__(self):
+        if self.f:
+            self.f.close()
+
+    def log_to_dw(self, action, **data):
+        if self.ENV == 'local':
+            if self.f:
+                self.f.write(json.dumps(data) + '\n')
+            return
+
+        # dev环境才打数据到dw
+        pass
diff --git a/examples/huoche/huoche/pipelines.py b/examples/huoche/huoche/pipelines.py
new file mode 100644
index 0000000..8dec984
--- /dev/null
+++ b/examples/huoche/huoche/pipelines.py
@@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+from cpca import transform
+from playhouse.shortcuts import dict_to_model
+from .items import HuocheDealerItem, HuocheDealerModel
+from .logger import DwLogger
+
+
+class HuocheDealerItemPipeline(object):
+    """
+    地址分词，https://github.com/DQinYuan/chinese_province_city_area_mapper
+    """
+
+    def process_item(self, item, spider):
+        if isinstance(item, HuocheDealerItem):
+            if item.get('address') and (not item.get('province') or not item['city']):
+                dataframe = transform([item['address']])
+                item['province'] = dataframe['省'].values[0]
+                item['city'] = dataframe['市'].values[0]
+            if item.get('tags') and isinstance(item['tags'], list):
+                item['tags'] = ','.join(item['tags'])
+        return item
+
+
+class HuocheDealerDwPipeline(object):
+
+    def __init__(self):
+        self.logger = DwLogger()
+
+    def process_item(self, item, spider):
+        if isinstance(item, HuocheDealerItem):
+            self.logger.log_to_dw("huoche_dealer", **item)
+        return item
+
+
+class HuocheDealerMySQLPipeline(object):
+    def process_item(self, item, spider):
+        if isinstance(item, HuocheDealerItem):
+            if not HuocheDealerModel.table_exists():
+                HuocheDealerModel.create_table()
+            try:
+                model = HuocheDealerModel.get_or_none(dealer_id=item.get('dealer_id'), channel=item['channel'])
+                if model:
+                    HuocheDealerModel.update(**item).where(HuocheDealerModel.id == model.id).execute()
+                else:
+                    model = dict_to_model(HuocheDealerModel, item, True)
+                    model.save()
+            except Exception as e:
+                spider.error("huoche_dealer MySQL pipeline failed, exception: %s" % str(e))
+                print(item)
+        return item
diff --git a/examples/huoche/huoche/settings.py b/examples/huoche/huoche/settings.py
new file mode 100644
index 0000000..fc1306f
--- /dev/null
+++ b/examples/huoche/huoche/settings.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+
+BOT_NAME = 'huoche'
+
+SPIDER_MODULES = ['huoche.spiders']
+NEWSPIDER_MODULE = 'huoche.spiders'
+
+SCHEDULER = "scrapy_redis.scheduler.Scheduler"
+DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"
+
+REDIS_PARAMS = {
+    "url": "redis://127.0.0.1:6379"
+}
+MYSQL_HOST = '127.0.0.1'
+MYSQL_PORT = 3306
+MYSQL_USER = 'root'
+MYSQL_PASSWORD = ''
+MYSQL_DATABASE = 'crawler'
+ENV = 'local'
+
+SCHEDULER_PERSIS = True
+SCHEDULER_QUEUE_CLASS = 'scrapy_redis.queue.SpiderPriorityQueue'
+REDIS_START_URLS_KEY = BOT_NAME + ":" + '%(name)s:start_urls'
+
+PARSER_ENGINE_CONFIG_FILE = "templates/gaode_pe.json"
+ROBOTSTXT_OBEY = False
+
+DOWNLOADER_MIDDLEWARES = {
+    "scrapy.downloadermiddlewares.retry.RetryMiddleware": 543
+}
+RETRY_TIMES = 1
+RETRY_HTTP_CODES = []
+ITEM_PIPELINES = {
+    'huoche.pipelines.HuocheDealerMySQLPipeline': 339,
+    'huoche.pipelines.HuocheDealerDwPipeline': 340,
+    'parser_engine.clue.pipelines.CluePersistentPipeline': 341,
+    'parser_engine.clue.pipelines.CluePipeline': 342,
+}
\ No newline at end of file
diff --git a/examples/huoche/huoche/spiders/__init__.py b/examples/huoche/huoche/spiders/__init__.py
new file mode 100644
index 0000000..ebd689a
--- /dev/null
+++ b/examples/huoche/huoche/spiders/__init__.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
diff --git a/examples/huoche/huoche/spiders/dongfeng_spider.py b/examples/huoche/huoche/spiders/dongfeng_spider.py
new file mode 100644
index 0000000..f663adf
--- /dev/null
+++ b/examples/huoche/huoche/spiders/dongfeng_spider.py
@@ -0,0 +1,45 @@
+from parser_engine import TemplateAnnotation
+from parser_engine.clue.spider import ClueSpider
+
+
+@TemplateAnnotation(start_url_tpl=({
+                        "name": "dongfeng_pe",
+                        "parent": {
+                            "xpath": "//li"
+                        },
+                        "itemname": "HuocheDealerItem",
+                        "fields": [
+                            {
+                                "key": "dealer_id",
+                                "xpath": "@data-id",
+                                "value_type": "singleton"
+                            },
+                            {
+                                "key": "leads_name",
+                                "xpath": "div[contains(@class,'data-Title')]/text()",
+                                "value_type": "singleton"
+                            },
+                            {
+                                "key": "address",
+                                "xpath": "p/span[@class='data-Address']/text()",
+                                "value_type": "singleton"
+                            },
+                            {
+                                "key": "phone",
+                                "xpath": "p/span[@class='data-Tel']/text()",
+                                "value_type": "singleton"
+                            },
+                            {
+                                "key": "brand",
+                                "xpath": "p/span[@class='data-Main']/text()",
+                                "value_type": "singleton"
+                            },
+                        ]},), channel='dongfeng', leads_src='东风')
+class DongfengSpider(ClueSpider):
+    name = 'dongfeng'
+
+    def parse(self, response):
+        items = self._parse_start_url(response)
+        for item in items:
+            yield item
+        self.finish_clue(response, len(items))
\ No newline at end of file
diff --git a/examples/huoche/huoche/spiders/futian_spider.py b/examples/huoche/huoche/spiders/futian_spider.py
new file mode 100644
index 0000000..e058d8a
--- /dev/null
+++ b/examples/huoche/huoche/spiders/futian_spider.py
@@ -0,0 +1,21 @@
+from parser_engine import TemplateAnnotation
+from parser_engine.clue.spider import ClueSpider
+
+
+@TemplateAnnotation(start_url_tpl=({
+                                       "name": "futian_pe",
+                                       "itemname": "HuocheDealerItem",
+                                       "extract_keys_map": {
+                                           "id": "dealer_id",
+                                           "dealerName": "leads_name",
+                                           "dealerAddress": "address",
+                                           "dealerTel": "phone"
+                                       }},), channel='futian', leads_src='福田汽车')
+class FutianSpider(ClueSpider):
+    name = 'futian'
+
+    def parse(self, response):
+        items = self._parse_start_url(response)
+        for item in items:
+            yield item
+        self.finish_clue(response, len(items))
diff --git a/examples/huoche/huoche/spiders/kachezhijia_spider.py b/examples/huoche/huoche/spiders/kachezhijia_spider.py
new file mode 100644
index 0000000..e69655c
--- /dev/null
+++ b/examples/huoche/huoche/spiders/kachezhijia_spider.py
@@ -0,0 +1,95 @@
+from parser_engine import TemplateAnnotation
+from parser_engine.clue.spider import ClueSpider
+from parser_engine.clue.items import ClueItem
+import re
+
+
+@TemplateAnnotation(start_url_tpl=({
+                                       "name": "kachezhijia_listing",
+                                       "parent": {
+                                           "xpath": "//ul[@class=\"dealers\"]/li/div[@class=\"detail\"]"
+                                       },
+                                       "itemname": "HuocheDealerItem",
+                                       "fields": [
+                                           {
+                                               "key": "dealer_id",
+                                               "xpath": "p[@class=\"contact\"]/a/@href",
+                                               "regexp": "360che.com/(\\d+)/",
+                                               "value_type": "singleton"
+                                           },
+                                           {
+                                               "key": "url",
+                                               "xpath": "p[@class=\"contact\"]/a/@href",
+                                               "value_type": "singleton"
+                                           },
+                                           {
+                                               "key": "leads_name",
+                                               "xpath": "h2/a[@href]/text()",
+                                               "value_type": "singleton"
+                                           },
+                                           {
+                                               "key": "tags",
+                                               "xpath": "span[@class=\"inline-block\"]/text()"
+                                           },
+                                           {
+                                               "key": "address",
+                                               "xpath": "p[last()-1]/text()",
+                                               "regexp": "地址 : (\\w+)",
+                                               "value_type": "stripped_string"
+                                           },
+                                           {
+                                               "key": "brand",
+                                               "xpath": "p[@class=\"contact\"]/a[@href]/text()",
+                                               "value_type": "singleton"
+                                           },
+                                           {
+                                               "key": "phone",
+                                               "xpath": "p[@class=\"contact\"]/span[@class=\"tel\"]/text()",
+                                               "value_type": "singleton"
+                                           }
+                                       ]
+                                   }, {
+                                       "name": "kachezhijia_pageinfo",
+                                       "fields": [{
+                                           "key": "totalPage",
+                                           "xpath": "//ul[@class=\"page-list\"]/li[last()-1]//a[@href]/text()",
+                                           "value_type": "int"
+                                       }, {
+                                           "key": "totalCount",
+                                           "xpath": "//ul[@id=\"site-list\"]/li[1]/a[@href]/text()",
+                                           "regexp": "不限 \((\\d+)\)",
+                                           "value_type": "int"
+                                       }
+                                       ]
+                                   }), channel='kachezhijia', leads_src='卡车之家')
+class CachezhijiaSpider(ClueSpider):
+    name = 'kachezhijia'
+
+    def parse(self, response):
+        from_url = response.request.url
+        from_clue_id = response.meta.get('clue_id')
+        # 翻页
+        if response.meta.get('open_pages', False):
+            data = self._parse_start_url(response, 1)
+            try:
+                total_count = data[0]['totalCount']
+                total_page = data[0]['totalPage']
+                self.info("卡车之家今日共计%d个HuocheDealer" % total_count)
+            except (KeyError, IndexError) as e:
+                self.error("get kachezhijia page totalCount error: %s data: %s, request.body: %s"
+                           % (str(e), data, response.request.body))
+            else:
+                response.request.meta['open_pages'] = 0
+                current_page = int(re.findall('0_c(\\d+)', from_url)[0])
+                for i in range(0, total_page + 1):
+                    if i == current_page:
+                        continue
+                    task = self.request_to_task(response.request)
+                    task.url = re.sub('c(\\d)', 'c%d' % i, task.url)
+                    yield ClueItem(
+                        {"url": task.url, "req": task, "project": self.project, "spider": self.name,
+                         "from_clue_id": from_clue_id, })
+        items = self._parse_start_url(response)
+        for item in items:
+            yield item
+        self.finish_clue(response, len(items))
diff --git a/examples/huoche/huoche/spiders/sxqc_spider.py b/examples/huoche/huoche/spiders/sxqc_spider.py
new file mode 100644
index 0000000..dcc908a
--- /dev/null
+++ b/examples/huoche/huoche/spiders/sxqc_spider.py
@@ -0,0 +1,31 @@
+from parser_engine import TemplateAnnotation
+from parser_engine.clue.spider import ClueSpider
+from ..items import HuocheDealerItem
+import json
+
+
+@TemplateAnnotation(start_url_tpl=({
+                        "name": "sxqc_pe",
+                        "itemname": "HuocheDealerItem",
+                        "extract_keys_map": {
+                            "title": "leads_name",
+                            "address": "address",
+                            "phone": "phone"
+                        }},), channel='', leads_src='')
+class FutianSpider(ClueSpider):
+    name = 'sxqc'
+
+    def parse(self, response):
+        body = '[' + bytes.decode(response.body) + ']'
+        body = body.replace("'", '"')
+        data = json.loads(body)
+        for v in data:
+            item = HuocheDealerItem(
+                leads_name=v['title'],
+                address=v['address'],
+                phone=v['phone'],
+                channel="sxqc",
+                leads_src="陕西重卡"
+            )
+            yield item
+        self.finish_clue(response, len(data))
diff --git a/examples/huoche/huoche/spiders/yiqijiefang_spider.py b/examples/huoche/huoche/spiders/yiqijiefang_spider.py
new file mode 100644
index 0000000..a47774a
--- /dev/null
+++ b/examples/huoche/huoche/spiders/yiqijiefang_spider.py
@@ -0,0 +1,50 @@
+from parser_engine import TemplateAnnotation
+from parser_engine.clue.spider import ClueSpider
+from six.moves.urllib.parse import parse_qsl
+
+
+@TemplateAnnotation(start_url_tpl=({
+                                       "name": "yiqijiefang_pe",
+                                       "parent": {
+                                           "xpath": "//table[@class='list_1']/tr"
+                                       },
+                                       "itemname": "HuocheDealerItem",
+                                       "fields": [
+                                           {
+                                               "key": "city",
+                                               "xpath": "td[@class='city']/text()",
+                                               "value_type": "singleton"
+                                           },
+                                           {
+                                               "key": "leads_name",
+                                               "xpath": "td[@class='fwz_name']/text()",
+                                               "value_type": "singleton"
+                                           },
+                                           {
+                                               "key": "address",
+                                               "xpath": "td[@class='address']/text()",
+                                               "value_type": "singleton"
+                                           },
+                                           {
+                                               "key": "phone",
+                                               "xpath": "td[@class='phone']/text()",
+                                               "value_type": "singleton"
+                                           },
+                                           {
+                                               "key": "service_phone",
+                                               "xpath": "td[@class='bei1']/text()",
+                                               "value_type": "singleton"
+                                           },
+                                       ]},), channel='jiefang', leads_src='解放')
+class YiqijiefangSpider(ClueSpider):
+    name = 'yiqijiefang'
+
+    def parse(self, response):
+        items = self._parse_start_url(response)
+        request_body = str(response.request.body, encoding="utf-8")
+        request_data = dict(parse_qsl(request_body))
+        province = request_data.get('province')
+        for item in items:
+            item['province'] = province
+            yield item
+        self.finish_clue(response, len(items))
diff --git a/examples/huoche/huoche/spiders/youka.py b/examples/huoche/huoche/spiders/youka.py
new file mode 100644
index 0000000..26a04b5
--- /dev/null
+++ b/examples/huoche/huoche/spiders/youka.py
@@ -0,0 +1,118 @@
+from parser_engine.clue.spider import ClueSpider
+from parser_engine import TemplateAnnotation
+from parser_engine.clue.items import ClueItem
+from parser_engine.request import TaskRequest
+from scrapy import Request
+
+
+@TemplateAnnotation(start_url_tpl=({
+                                       "name": "youka_shop_listing_api",
+                                       "parent": {
+                                           "json_key": "data",
+                                       },
+                                       "fields": [{
+                                           "key": "totalPage",
+                                           "json_key": "totalPage",
+
+                                       }, {
+                                           "key": "ids",
+                                           "json_path": "dataList[*].id"
+                                       }]
+                                   },),
+    tpls=({
+        "name": "youka_shop_detail_api",
+        "itemname": "HuocheDealerItem",
+        "parent": {
+            "json_key": "data",
+        },
+        "fields": [{
+            "key": "company_type",
+            "json_key": "category",
+            "mapper": {
+                1: "二手车直营店",
+                2: "4S店"
+            }
+        }, {
+            "key": "dealer_id",
+            "json_key": "id",
+            "required": 1,
+        }, {
+            "key": "leads_name",
+            "json_key": "shopName",
+        }, {
+            "key": "area",
+            "json_path": "districtDto.districtName",
+            "value_type": "singleton"
+        }, {
+            "key": "province",
+            "json_path": "provinceDto.provinceName",
+            "value_type": "singleton"
+        }, {
+            "key": "city",
+            "json_path": "cityDto.cityName",
+            "value_type": "singleton"
+        }, {
+            "key": "address",
+            "json_key": "wholeAddress",
+        }, {
+            "key": "phone",
+            "json_key": "mobile",
+        }, {
+            "key": "service_phone",
+            "default_value": "",
+        }, {
+            "key": "wechat",
+            "json_key": "wechat",
+        }, {
+            "key": "linkman",
+            "json_key": "contactName"
+        }, {
+            "key": "tags",
+            "json_key": "tags",
+            "join": ","
+        }, {
+            "key": "brand",
+            "json_key": "brandList",
+            "join": ","
+        }, {
+            "key": "business_scope",
+            "json_key": "scope"
+        }]
+    }), channel='youka', leads_src='优卡')
+class YoukaSpider(ClueSpider):
+    name = 'youka'
+    custom_settings = {
+        'CONCURRENT_REQUESTS': 2,
+        'CONCURRENT_REQUESTS_PER_DOMAIN': 1
+    }
+
+    # 二手车直营店 "category": 1,
+    # 4S店 "category": 2,
+    def parse(self, response):
+        items = self._parse_start_url(response)
+        meta = response.meta
+        clue_id = meta.get('clue_id')
+        from_url = response.request.url
+        if meta.get('open_pages'):
+            total_page = items[0]['totalPage']
+            import re
+            current_page = int(re.findall('page=(\\d+)', from_url)[0])
+            for i in range(1, total_page + 1):
+                if current_page == i:
+                    continue
+                url = "http://www.china2cv.com/truck-foton-web/api/shop/v1/getShopList?page=%d&pageSize=10" % i
+                yield ClueItem({"project": "huoche", "spider": self.name, "req": TaskRequest(
+                    url=url,
+                    meta={"from_clue_id": clue_id}
+                )})
+        for item in items:
+            for id in item['ids']:
+                r = Request(url="http://www.china2cv.com/truck-foton-web/api/shop/v1/getShopInfo?shopId=%d" % int(id),
+                            callback=self._response_downloaded)
+                r.meta.update(rule=0, from_clue_id=clue_id)
+                yield r
+
+    def process_results(self, response, results):
+        for item in results:
+            item['url'] = 'http://www.china2cv.com/storeDetail.html?typess=1&shopId=' + str(item['dealer_id'])
+        return results
diff --git a/examples/huoche/huoche/spiders/zhongguozhongqi.py b/examples/huoche/huoche/spiders/zhongguozhongqi.py
new file mode 100644
index 0000000..6f7c96a
--- /dev/null
+++ b/examples/huoche/huoche/spiders/zhongguozhongqi.py
@@ -0,0 +1,44 @@
+from parser_engine import TemplateAnnotation
+from parser_engine.clue.spider import ClueSpider
+
+
+@TemplateAnnotation(start_url_tpl=({
+    "name": "zhongguozhongqi_xiaoshouwangluo",
+    "itemname": "HuocheDealerItem",
+    "parent": {
+        "xpath": "//tr[@class=\"bgcolor2\"]"
+    },
+    "fields": [
+        {
+            "key": "area",
+            "xpath": "td[1]/text()",
+            "value_type": "stripped_string"
+        }, {
+            "key": "leads_name",
+            "xpath": "td[2]/text()",
+            "value_type": "stripped_string"
+        }, {
+            "key": "address",
+            "xpath": "td[3]/text()",
+            "value_type": "stripped_string"
+        }, {
+            "key": "linkman",
+            "xpath": "td[4]/text()",
+            "value_type": "stripped_string"
+        }, {
+            "key": "phone",
+            "xpath": "td[5]/text()",
+            "value_type": "stripped_string"
+        }
+    ]
+}), channel='zhongguozhongqi', leads_src='中国重汽')
+class ZhongguozhongqiSpider(ClueSpider):
+    name = 'zhongguozhongqi'
+    def parse(self, response):
+        items = self._parse_start_url(response)
+        for item in items:
+            phone = item.get('phone')
+            if phone:
+                item['phone'] = phone.replace('、', ',')
+            yield item
+        self.finish_clue(response, len(items))
diff --git a/examples/huoche/requirements.txt b/examples/huoche/requirements.txt
new file mode 100644
index 0000000..7beb861
--- /dev/null
+++ b/examples/huoche/requirements.txt
@@ -0,0 +1,5 @@
+scrapy_redis
+scrapy
+parser_engine
+cpca
+peewee
\ No newline at end of file
diff --git a/parser_engine/clue/pipelines.py b/parser_engine/clue/pipelines.py
index 66c7a7b..ce26ba9 100644
--- a/parser_engine/clue/pipelines.py
+++ b/parser_engine/clue/pipelines.py
@@ -12,14 +12,17 @@ def process_item(self, item, spider):
             model = ClueModel.from_item(item)
             model.save()
             item['req'].meta['clue_id'] = model.id
-            spider.info('CluePersistentPipeline save clue [clue_id] %s to database' % item['req'].meta.get('clue_id'))
+            spider.info('CluePersistentPipeline save clue {clue_id} to database'
+                        .format(clue_id=item['req'].meta.get('clue_id')))
         return item
 
 
 # route clue to queue
 class CluePipeline(object):
+
     def process_item(self, item, spider):
         if isinstance(item, ClueItem):
-            spider.info('CluePipeline route clue [clue_id] %s to queue' % item['req'].meta.get('clue_id'))
+            clue_id = item['req'].meta.get('clue_id')
+            spider.info('CluePipeline route clue {clue_id} to queue'.format(clue_id=clue_id))
             spider.route('%s:%s:start_urls' % (item['project'], item['spider']), item['req'])
         return item
diff --git a/parser_engine/decorator.py b/parser_engine/decorator.py
index cc92e99..fc154f4 100644
--- a/parser_engine/decorator.py
+++ b/parser_engine/decorator.py
@@ -4,7 +4,7 @@
 from scrapy.linkextractors import LinkExtractor
 
 from .template import PETemplate
-from .parser import parse_with_tpl
+from .parser import parse_with_tpl, PEParser
 from .utils import is_sequence, is_string
 from .singleton import Singleton
 from .config import init_config, get_config_data
@@ -19,13 +19,19 @@ def get_method(method):
             return getattr(self, method, None)
 
     self._rules = [copy.copy(r) for r in self.rules]
+    global c
     for rule in self._rules:
         # diff start
+        # support PECrawlSpider: use template driven callback processor
+        if getattr(rule, "template", None):
+            rule.parser = get_method(PEParser(rule.template))
+            continue
         tpl_id = rule.cb_kwargs.pop('tpl_id', None)
         if tpl_id:
             tpl = PETemplate.from_json(find_by_id(tpl_id))
             rule.link_extractor = tpl.get_link_extractor()
             rule.callback = parse_with_tpl
+            rule.cb_kwargs['tpl'] = tpl
         # diff end
         rule.process_links = get_method(rule.process_links)
         rule.process_request = get_method(rule.process_request)
@@ -212,18 +218,8 @@ def _parse_start_url(self, response, tpl_index_or_id=None):
                     # do patch
                     spcls._compile_rules = _compile_rules_patch
             else:
-                pass
                 # FIXME: scrapy.Spider && scrapy_redis.spiders.RedisSpider case
-                # def parse_response_patch(self, response):
-                #     return self.start_rule.callback(response)
-                #
-                # spcls.parse_response = classmethod(parse_response_patch)
-                #
-                # def start_requests(self):
-                #     for url in self.start_urls:
-                #         yield Request(url, callback=spcls.parse_response)
-                #
-                # spcls.start_requests = classmethod(start_requests)
+                pass
             return spcls
 
         return _deco
diff --git a/parser_engine/request.py b/parser_engine/request.py
index 539a1eb..474707b 100644
--- a/parser_engine/request.py
+++ b/parser_engine/request.py
@@ -4,22 +4,6 @@
 from six.moves.urllib.parse import urlencode
 
 
-class JsonRequest(Request):
-
-    def __init__(self, *args, **kwargs):
-        jsondata = kwargs.pop('jsondata', None)
-        if jsondata and kwargs.get('method') is None:
-            kwargs['method'] = 'POST'
-
-        super(JsonRequest, self).__init__(*args, **kwargs)
-
-        if jsondata:
-            data = json.dumps(jsondata) if isinstance(jsondata, dict) else jsondata
-            if self.method == 'POST':
-                self.headers.setdefault(b'Content-Type', b'application/json')
-                self._set_body(data)
-
-
 class TaskRequest(dict):
     def __init__(self, url=None, method='GET', body=None, headers=None, cookies=None, meta=None, **kwargs):
         if headers is None:
@@ -59,6 +43,22 @@ def __getattr__(self, item):
         return self.get(item)
 
 
+class JsonRequest(Request):
+
+    def __init__(self, *args, **kwargs):
+        jsondata = kwargs.pop('jsondata', None)
+        if jsondata and kwargs.get('method') is None:
+            kwargs['method'] = 'POST'
+
+        super(JsonRequest, self).__init__(*args, **kwargs)
+
+        if jsondata:
+            data = json.dumps(jsondata) if isinstance(jsondata, dict) else jsondata
+            if self.method == 'POST':
+                self.headers.setdefault(b'Content-Type', b'application/json')
+                self._set_body(data)
+
+
 def make_request(url, method='GET', formdata=None, jsondata=None, headers=None, **kwargs):
     if formdata:
         return FormRequest(url=url, method=method, formdata=formdata, headers=headers, **kwargs)
diff --git a/parser_engine/spider.py b/parser_engine/spider.py
index 022cd6b..d77d4fe 100644
--- a/parser_engine/spider.py
+++ b/parser_engine/spider.py
@@ -44,12 +44,14 @@ def __str__(self):
 
 
 class PECrawlSpider(CrawlSpider):
+    """
+    start_urls driven
+    """
     # subclass should init rules before call super init
     start_rule = None
 
     def __init__(self, *a, **kw):
         super(PECrawlSpider, self).__init__(*a, **kw)
-        self._compile_rules()
 
     def start_requests(self):
         """
@@ -104,7 +106,7 @@ def _parse_response_v2(self, response, parser, callback, cb_kwargs, follow=True)
         if parser:
             cb_res = parser(response, **cb_kwargs) or ()
             if callback:
-                cb_res = callback(response, **cb_kwargs) or ()
+                cb_res = callback(response, cb_res=cb_res, **cb_kwargs) or ()
             cb_res = self.process_results(response, cb_res)
             for requests_or_item in iterate_spider_output(cb_res):
                 yield requests_or_item
@@ -132,6 +134,9 @@ def get_method(method):
 
 
 class PESpider(RedisCrawlSpider):
+    """
+    redis driven
+    """
 
     def __init__(self, *args, **kwargs):
         super(PESpider, self).__init__(*args, **kwargs)