【WebMagic】webmagic-selenium 找不到config.ini文件

WebMagic原作者对于webmagic-selenium已经有较长时间没有更新了,但是我们又想要用这个来获取渲染的页面数据,该怎么办呢?
鉴于Selenium 已经不再支持 PhantomJS,即使你使用了webmagic-selenium,并且添加了config.ini文件,程序仍然会报错。
有人会说降低Selenium的 jar包的版本就好,但是近来即使你降低到最低版本也不行了,Selenium已经全部移除了PhantomJS的依赖,老版本也是如此。
为此,我的建议是下载webmagic-selenium源码,然后修改剔除掉原有代码中对PhantomJS的使用,加入到自己的项目中使用即可。
我主要做了两个文件的改动:

  • 修改WebDriverPool.java
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    package com.born2do.webmagic.downloader.selenium;

    import org.openqa.selenium.WebDriver;
    import org.openqa.selenium.chrome.ChromeDriver;
    import org.openqa.selenium.firefox.FirefoxDriver;
    import org.openqa.selenium.remote.DesiredCapabilities;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;

    import java.io.FileReader;
    import java.io.IOException;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.List;
    import java.util.Properties;
    import java.util.concurrent.BlockingDeque;
    import java.util.concurrent.LinkedBlockingDeque;
    import java.util.concurrent.atomic.AtomicInteger;

    /**
    * @author code4crafter@gmail.com <br>
    * Date: 13-7-26 <br>
    * Time: 下午1:41 <br>
    */
    class WebDriverPool {
    private Logger logger = LoggerFactory.getLogger(getClass());

    private final static int DEFAULT_CAPACITY = 5;

    private final int capacity;

    private final static int STAT_RUNNING = 1;

    private final static int STAT_CLODED = 2;

    private AtomicInteger stat = new AtomicInteger(STAT_RUNNING);

    private WebDriver mDriver = null;

    private static final String DEFAULT_CONFIG_FILE = "/config.ini";
    private static final String DRIVER_FIREFOX = "firefox";
    private static final String DRIVER_CHROME = "chrome";

    protected static Properties sConfig;
    protected static DesiredCapabilities sCaps;

    /**
    * Configure the GhostDriver, and initialize a WebDriver instance. This part
    * of code comes from GhostDriver.
    * https://github.com/detro/ghostdriver/tree/master/test/java/src/test/java/ghostdriver
    *
    * @author bob.li.0718@gmail.com
    * @throws IOException
    */
    public void configure() throws IOException {
    // Read config file
    sConfig = new Properties();
    // String configFile = DEFAULT_CONFIG_FILE;
    String configFile = this.getClass().getResource(DEFAULT_CONFIG_FILE).getPath();
    if (System.getProperty("selenuim_config")!=null){
    configFile = System.getProperty("selenuim_config");
    }
    sConfig.load(new FileReader(configFile));

    // Prepare capabilities
    sCaps = new DesiredCapabilities();
    sCaps.setJavascriptEnabled(true);
    sCaps.setCapability("takesScreenshot", false);

    String driver = sConfig.getProperty("driver", DRIVER_CHROME);

    ArrayList<String> cliArgsCap = new ArrayList<String>();
    cliArgsCap.add("--web-security=false");
    cliArgsCap.add("--ssl-protocol=any");
    cliArgsCap.add("--ignore-ssl-errors=true");

    // Start appropriate Driver
    if (driver.equals(DRIVER_FIREFOX)) {
    mDriver = new FirefoxDriver(sCaps);
    } else if (driver.equals(DRIVER_CHROME)) {
    mDriver = new ChromeDriver(sCaps);
    }
    }

    /**
    * check whether input is a valid URL
    *
    * @author bob.li.0718@gmail.com
    * @param urlString urlString
    * @return true means yes, otherwise no.
    */
    private boolean isUrl(String urlString) {
    try {
    new URL(urlString);
    return true;
    } catch (MalformedURLException mue) {
    return false;
    }
    }

    /**
    * store webDrivers created
    */
    private List<WebDriver> webDriverList = Collections
    .synchronizedList(new ArrayList<WebDriver>());

    /**
    * store webDrivers available
    */
    private BlockingDeque<WebDriver> innerQueue = new LinkedBlockingDeque<WebDriver>();

    public WebDriverPool(int capacity) {
    this.capacity = capacity;
    }

    public WebDriverPool() {
    this(DEFAULT_CAPACITY);
    }

    /**
    *
    * @return
    * @throws InterruptedException
    */
    public WebDriver get() throws InterruptedException {
    checkRunning();
    WebDriver poll = innerQueue.poll();
    if (poll != null) {
    return poll;
    }
    if (webDriverList.size() < capacity) {
    synchronized (webDriverList) {
    if (webDriverList.size() < capacity) {

    // add new WebDriver instance into pool
    try {
    configure();
    innerQueue.add(mDriver);
    webDriverList.add(mDriver);
    } catch (IOException e) {
    e.printStackTrace();
    }
    }
    }

    }
    return innerQueue.take();
    }

    public void returnToPool(WebDriver webDriver) {
    checkRunning();
    innerQueue.add(webDriver);
    }

    protected void checkRunning() {
    if (!stat.compareAndSet(STAT_RUNNING, STAT_RUNNING)) {
    throw new IllegalStateException("Already closed!");
    }
    }

    public void closeAll() {
    boolean b = stat.compareAndSet(STAT_RUNNING, STAT_CLODED);
    if (!b) {
    throw new IllegalStateException("Already closed!");
    }
    for (WebDriver webDriver : webDriverList) {
    logger.info("Quit webDriver" + webDriver);
    webDriver.quit();
    webDriver = null;
    }
    }

    }

  • 在resources目录下新增config.ini文件
    1
    2
    3
    4
    5
    6
    7
    # What WebDriver to use for the tests
    #driver=firefox
    driver=chrome

    #谷歌浏览器启动程序路径
    #chrome_exec_path=D:\chromedriver_win32\chromedriver.exe
    chrome_driver_loglevel=DEBUG