简述网站规划的主要内容,建设一个怎样的自己的网站,网站建设备案多长时间,网站建设统计表1.找开发去掉验证码或者使用万能验证码
2.使用OCR自动识别
使用OCR自动化识别#xff0c;一般识别率不是太高#xff0c;处理一般简单验证码还是没问题
这里使用的是Tesseract-OCR,下载地址#xff1a;https://github.com/A9T9/Free-Ocr-Windows-Desktop/releases
怎么使…1.找开发去掉验证码或者使用万能验证码
2.使用OCR自动识别
使用OCR自动化识别一般识别率不是太高处理一般简单验证码还是没问题
这里使用的是Tesseract-OCR,下载地址https://github.com/A9T9/Free-Ocr-Windows-Desktop/releases
怎么使用呢
进入安装后的目录
tesseract.exe test.png test -1 准备一份网页上面使用该验证码
html
head
titleTable test by Young/title
/head
body/br
h1 Test /h1img srchttp://csujwc.its.csu.edu.cn/sys/ValidateCode.aspx?t1/br
/body
/html要识别验证码首先得取得验证码这两款采取对 页面元素部分截图的方式首先获取整个页面的截图
然后找到页面元素坐标进行截取 /*** This method for screen shot element* * param driver* param element* param path* throws InterruptedException*/public static void screenShotForElement(WebDriver driver,WebElement element, String path) throws InterruptedException {File scrFile ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);try {Point p element.getLocation();int width element.getSize().getWidth();int height element.getSize().getHeight();Rectangle rect new Rectangle(width, height);BufferedImage img ImageIO.read(scrFile);BufferedImage dest img.getSubimage(p.getX(), p.getY(),rect.width, rect.height);ImageIO.write(dest, png, scrFile);Thread.sleep(1000);FileUtils.copyFile(scrFile, new File(path));} catch (IOException e) {e.printStackTrace();}}截取完元素就可以调用Tesseract-OCR生成text
// use Tesseract to get stringsRuntime rt Runtime.getRuntime();rt.exec(cmd.exe /C tesseract.exe D:\\Tesseract-OCR\\test.png D:\\Tesseract-OCR\\test -1 );接下来通过java读取txt /*** This method for read TXT file* * param filePath*/public static void readTextFile(String filePath) {try {String encoding GBK;File file new File(filePath);if (file.isFile() file.exists()) { // 判断文件是否存在InputStreamReader read new InputStreamReader(new FileInputStream(file), encoding);// 考虑到编码格式BufferedReader bufferedReader new BufferedReader(read);String lineTxt null;while ((lineTxt bufferedReader.readLine()) ! null) {System.out.println(lineTxt);}read.close();} else {System.out.println(找不到指定的文件);}} catch (Exception e) {System.out.println(读取文件内容出错);e.printStackTrace();}}整体代码如下 1 package com.dbyl.tests;2 3 import java.awt.Rectangle;4 import java.awt.image.BufferedImage;5 import java.io.BufferedReader;6 import java.io.File;7 import java.io.FileInputStream;8 import java.io.IOException;9 import java.io.InputStreamReader;10 import java.io.Reader;11 import java.util.concurrent.TimeUnit;12 13 import javax.imageio.ImageIO;14 15 import org.apache.commons.io.FileUtils;16 import org.openqa.selenium.By;17 import org.openqa.selenium.OutputType;18 import org.openqa.selenium.Point;19 import org.openqa.selenium.TakesScreenshot;20 import org.openqa.selenium.WebDriver;21 import org.openqa.selenium.WebElement;22 23 import com.dbyl.libarary.utils.DriverFactory;24 25 public class TesseractTest {26 27 public static void main(String[] args) throws IOException,28 InterruptedException {29 30 WebDriver driver DriverFactory.getChromeDriver();31 driver.get(file:///C:/Users/validation.html);32 driver.manage().timeouts().pageLoadTimeout(30, TimeUnit.SECONDS);33 WebElement element driver.findElement(By.xpath(//img));34 35 // take screen shot for element36 screenShotForElement(driver, element, D:\\Tesseract-OCR\\test.png);37 38 driver.quit();39 40 // use Tesseract to get strings41 Runtime rt Runtime.getRuntime();42 rt.exec(cmd.exe /C tesseract.exe D:\\Tesseract-OCR\\test.png D:\\Tesseract-OCR\\test -1 );43 44 Thread.sleep(1000);45 // Read text46 readTextFile(D:\\Tesseract-OCR\\test.txt);47 }48 49 /**50 * This method for read TXT file51 * 52 * param filePath53 */54 public static void readTextFile(String filePath) {55 try {56 String encoding GBK;57 File file new File(filePath);58 if (file.isFile() file.exists()) { // 判断文件是否存在59 InputStreamReader read new InputStreamReader(60 new FileInputStream(file), encoding);// 考虑到编码格式61 BufferedReader bufferedReader new BufferedReader(read);62 String lineTxt null;63 while ((lineTxt bufferedReader.readLine()) ! null) {64 System.out.println(lineTxt);65 }66 read.close();67 } else {68 System.out.println(找不到指定的文件);69 }70 } catch (Exception e) {71 System.out.println(读取文件内容出错);72 e.printStackTrace();73 }74 }75 76 /**77 * This method for screen shot element78 * 79 * param driver80 * param element81 * param path82 * throws InterruptedException83 */84 public static void screenShotForElement(WebDriver driver,85 WebElement element, String path) throws InterruptedException {86 File scrFile ((TakesScreenshot) driver)87 .getScreenshotAs(OutputType.FILE);88 try {89 Point p element.getLocation();90 int width element.getSize().getWidth();91 int height element.getSize().getHeight();92 Rectangle rect new Rectangle(width, height);93 BufferedImage img ImageIO.read(scrFile);94 BufferedImage dest img.getSubimage(p.getX(), p.getY(),95 rect.width, rect.height);96 ImageIO.write(dest, png, scrFile);97 Thread.sleep(1000);98 FileUtils.copyFile(scrFile, new File(path));99 } catch (IOException e) {
100 e.printStackTrace();
101 }
102 }
103
104 }