From 1a264d9864d958c72ab596045914b5a11a222968 Mon Sep 17 00:00:00 2001 From: zhangmeng <1334717033@qq.com> Date: Sat, 1 Apr 2023 11:12:18 +0800 Subject: [PATCH] =?UTF-8?q?pdf=20=E8=AF=86=E5=88=AB=202023=E5=B9=B44?= =?UTF-8?q?=E6=9C=881=E6=97=A511:06:23?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 6 + pom.xml | 12 + .../tools/controller/BarCodeController.java | 2 +- .../tools/controller/HomeController.java | 4 + .../tools/controller/PdfOcrController.java | 94 ++++ .../controller/SmallToolsController.java | 26 + .../com/zhangmeng/tools/utils/PlateUtil.java | 466 ++---------------- .../zhangmeng/tools/utils/ResourcesUtils.java | 1 + src/main/resources/fxml/home.fxml | 3 +- src/main/resources/fxml/pdf-ocr.fxml | 18 + src/main/resources/static/redame/img_42.png | Bin 0 -> 82794 bytes 11 files changed, 193 insertions(+), 439 deletions(-) create mode 100644 src/main/java/com/zhangmeng/tools/controller/PdfOcrController.java create mode 100644 src/main/resources/fxml/pdf-ocr.fxml create mode 100644 src/main/resources/static/redame/img_42.png diff --git a/README.md b/README.md index 11c050d..0229288 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,12 @@ ![](./src/main/resources/static/redame/img_41.png) +#### 3.14 pdf内容识别 + +> 开源项目 itext + +![](./src/main/resources/static/redame/img_42.png) + ### 4. 编解码工具 diff --git a/pom.xml b/pom.xml index 52327b1..6e3dc6c 100644 --- a/pom.xml +++ b/pom.xml @@ -344,6 +344,18 @@ tess4j 4.5.2 + + + com.lowagie + itext + 4.2.1 + + + com.github.codemonstur + jpedal + 4.92-p13 + + diff --git a/src/main/java/com/zhangmeng/tools/controller/BarCodeController.java b/src/main/java/com/zhangmeng/tools/controller/BarCodeController.java index d0e859a..d4223f2 100644 --- a/src/main/java/com/zhangmeng/tools/controller/BarCodeController.java +++ b/src/main/java/com/zhangmeng/tools/controller/BarCodeController.java @@ -95,7 +95,7 @@ public class BarCodeController { } public void init_tess4j(File file) { - String result = PlateUtil.zxing_bar_code(file.getPath(), 1, 0); + String result = PlateUtil.zxing_bar_code(file.getPath()); res_view.setText(result); } } diff --git a/src/main/java/com/zhangmeng/tools/controller/HomeController.java b/src/main/java/com/zhangmeng/tools/controller/HomeController.java index 307f344..1a40c0e 100644 --- a/src/main/java/com/zhangmeng/tools/controller/HomeController.java +++ b/src/main/java/com/zhangmeng/tools/controller/HomeController.java @@ -496,4 +496,8 @@ public class HomeController implements Serializable { public void bar_code_menu_item(ActionEvent event) { load_small_tools(12); } + + public void pdf_menu_item(ActionEvent event) { + load_small_tools(13); + } } \ No newline at end of file diff --git a/src/main/java/com/zhangmeng/tools/controller/PdfOcrController.java b/src/main/java/com/zhangmeng/tools/controller/PdfOcrController.java new file mode 100644 index 0000000..c63e9ce --- /dev/null +++ b/src/main/java/com/zhangmeng/tools/controller/PdfOcrController.java @@ -0,0 +1,94 @@ +package com.zhangmeng.tools.controller; + +import com.zhangmeng.tools.utils.AlertUtils; +import com.zhangmeng.tools.utils.ImagePath; +import com.zhangmeng.tools.utils.PlateUtil; +import javafx.application.Platform; +import javafx.beans.property.SimpleObjectProperty; +import javafx.fxml.FXML; +import javafx.scene.control.Button; +import javafx.scene.control.TextArea; +import javafx.scene.control.TextField; +import javafx.scene.image.Image; +import javafx.scene.image.ImageView; +import javafx.stage.FileChooser; +import javafx.stage.Stage; +import lombok.extern.slf4j.Slf4j; + +import java.io.File; + +/** + * @author : 芊芊墨客 + * @version : 1.0 + * @date : 2023-04-01 10:46 + */ +@Slf4j +public class PdfOcrController { + + public static String tessdata = System.getProperty("user.dir"); + + @FXML + public Button file_choose_button; + + @FXML + public TextArea res_view; + + @FXML + public Button cover; + + @FXML + public TextField file_path; + + public static final SimpleObjectProperty choose_file = new SimpleObjectProperty<>(); + + @FXML + public void initialize() { + + file_choose_button.setText(null); + ImageView iv = new ImageView(new Image(ImagePath.path(ImagePath.ImagePathType.IMAGE_FILE))); + iv.setPreserveRatio(true); + iv.setFitWidth(18); + file_choose_button.setGraphic(iv); + + cover.setText("识别"); + + file_choose_button.setOnAction(event -> { + choose_file(); + }); + + cover.setOnAction(event -> { + if (choose_file.getValue() == null) { + AlertUtils.alert_warning("请选择将要识别的pdf再试!"); + return; + } + + File file = choose_file.getValue(); + Stage alert = AlertUtils.alert_loading(cover.getScene().getWindow()); + new Thread(() -> { + Platform.runLater(() -> { + init_pdf(file); + alert.close(); + }); + }).start(); + }); + } + + public void choose_file() { + Stage stage = new Stage(); + FileChooser dc = new FileChooser(); + dc.setTitle("文件选择"); + dc.getExtensionFilters().addAll(new FileChooser.ExtensionFilter("类型", "*.pdf")); + File file = dc.showOpenDialog(stage); + if (file != null) { + String path = file.getAbsolutePath(); + file_path.setText(path); + log.info("file_path:{}", path); + choose_file.set(file); + } + } + + public void init_pdf(File file) { + String result = PlateUtil.getPdfContent(file); + res_view.setText(result); + } +} diff --git a/src/main/java/com/zhangmeng/tools/controller/SmallToolsController.java b/src/main/java/com/zhangmeng/tools/controller/SmallToolsController.java index 1b501d1..7edc153 100644 --- a/src/main/java/com/zhangmeng/tools/controller/SmallToolsController.java +++ b/src/main/java/com/zhangmeng/tools/controller/SmallToolsController.java @@ -80,6 +80,7 @@ public class SmallToolsController { private AnchorPane maven_install_jar; private AnchorPane word_ocr; private AnchorPane bar_code; + private AnchorPane pdf_ocr; @FXML private ListView listView; @@ -385,6 +386,13 @@ public class SmallToolsController { } bar_code(flag); } + + if (newValue.getIndex() == 13) { + if (pdf_ocr != null) { + flag = true; + } + pdf_ocr(flag); + } } }); } @@ -407,6 +415,7 @@ public class SmallToolsController { case Maven_Install_Jar -> new Image(ImagePath.path(ImagePath.ImagePathType.Qr_CODE)); case Word_ocr -> new Image(ImagePath.path(ImagePath.ImagePathType.Qr_CODE)); case Bar_Code -> new Image(ImagePath.path(ImagePath.ImagePathType.Qr_CODE)); + case Pdf_Ocr -> new Image(ImagePath.path(ImagePath.ImagePathType.Qr_CODE)); }; } @@ -677,6 +686,23 @@ public class SmallToolsController { common_method(); } + public void pdf_ocr(boolean flag) { + //默认选择第一个 + listView.getSelectionModel().select(13); + + if (!flag) { + try { + root = FXMLLoader.load(ResourcesUtils.getResource("pdf-ocr")); + } catch (IOException e) { + e.printStackTrace(); + } + pdf_ocr = root; + } else { + root = pdf_ocr; + } + common_method(); + } + private void common_method() { splitPane.getItems().remove(1); diff --git a/src/main/java/com/zhangmeng/tools/utils/PlateUtil.java b/src/main/java/com/zhangmeng/tools/utils/PlateUtil.java index 5d18e19..39f89f2 100644 --- a/src/main/java/com/zhangmeng/tools/utils/PlateUtil.java +++ b/src/main/java/com/zhangmeng/tools/utils/PlateUtil.java @@ -4,6 +4,9 @@ package com.zhangmeng.tools.utils; import com.google.zxing.*; import com.google.zxing.client.j2se.BufferedImageLuminanceSource; import com.google.zxing.common.HybridBinarizer; +import com.itextpdf.text.pdf.PdfReader; +import com.itextpdf.text.pdf.parser.PdfTextExtractor; + import lombok.extern.slf4j.Slf4j; import org.opencv.core.Core; import org.opencv.core.Mat; @@ -11,9 +14,13 @@ import org.opencv.core.MatOfByte; import org.opencv.core.MatOfInt; import org.opencv.imgcodecs.Imgcodecs; import org.opencv.imgproc.Imgproc; +import org.springframework.web.multipart.MultipartFile; import java.awt.image.BufferedImage; import java.awt.image.DataBufferByte; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; import java.util.HashMap; import java.util.Map; @@ -170,446 +177,17 @@ public class PlateUtil { IMWRITE_PNG_STRATEGY_FIXED = 4, IMWRITE_PXM_BINARY = 32; - - // - // C++: Mat imdecode(Mat buf, int flags) - // - /** - *

Reads an image from a buffer in memory.

- * - *

The function reads an image from the specified buffer in the memory. - * If the buffer is too short or contains invalid data, the empty matrix/image - * is returned.

- * - *

See "imread" for the list of supported formats and flags description.

- * - *

Note: In the case of color images, the decoded images will have the channels - * stored in B G R order.

- * - * @param buf Input array or vector of bytes. - * @param flags The same flags as in "imread". - * @see org.opencv.highgui.Highgui.imdecode + * 条形码识别 */ - public static Mat imdecode(Mat buf, int flags) { - - Mat retVal = new Mat(imdecode_0(buf.nativeObj, flags)); - - return retVal; - } - - - // - // C++: bool imencode(string ext, Mat img, vector_uchar& buf, vector_int params = vector()) - // - - /** - *

Encodes an image into a memory buffer.

- * - *

The function compresses the image and stores it in the memory buffer that is - * resized to fit the result. - * See "imwrite" for the list of supported formats and flags description.

- * - *

Note: cvEncodeImage returns single-row matrix of type - * CV_8UC1 that contains encoded image as array of bytes.

- * - * @param ext File extension that defines the output format. - * @param img Image to be written. - * @param buf Output buffer resized to fit the compressed image. - * @param params Format-specific parameters. See "imwrite". - * @see org.opencv.highgui.Highgui.imencode - */ - public static boolean imencode(String ext, Mat img, MatOfByte buf, MatOfInt params) { - Mat buf_mat = buf; - Mat params_mat = params; - boolean retVal = imencode_0(ext, img.nativeObj, buf_mat.nativeObj, params_mat.nativeObj); - - return retVal; - } - - /** - *

Encodes an image into a memory buffer.

- * - *

The function compresses the image and stores it in the memory buffer that is - * resized to fit the result. - * See "imwrite" for the list of supported formats and flags description.

- * - *

Note: cvEncodeImage returns single-row matrix of type - * CV_8UC1 that contains encoded image as array of bytes.

- * - * @param ext File extension that defines the output format. - * @param img Image to be written. - * @param buf Output buffer resized to fit the compressed image. - * @see org.opencv.highgui.Highgui.imencode - */ - public static boolean imencode(String ext, Mat img, MatOfByte buf) { - Mat buf_mat = buf; - boolean retVal = imencode_1(ext, img.nativeObj, buf_mat.nativeObj); - - return retVal; - } - - - // - // C++: Mat imread(string filename, int flags = 1) - // - - /** - *

Loads an image from a file.

- * - *

The function imread loads an image from the specified file and - * returns it. If the image cannot be read (because of missing file, improper - * permissions, unsupported or invalid format), the function returns an empty - * matrix (Mat.data==NULL). Currently, the following file formats - * are supported:

- *
    - *
  • Windows bitmaps - *.bmp, *.dib (always supported) - *
  • JPEG files - *.jpeg, *.jpg, *.jpe (see the *Notes* - * section) - *
  • JPEG 2000 files - *.jp2 (see the *Notes* section) - *
  • Portable Network Graphics - *.png (see the *Notes* - * section) - *
  • Portable image format - *.pbm, *.pgm, *.ppm (always - * supported) - *
  • Sun rasters - *.sr, *.ras (always supported) - *
  • TIFF files - *.tiff, *.tif (see the *Notes* section) - *
- * - *

Note:

- *
    - *
  • The function determines the type of an image by the content, not by - * the file extension. - *
  • On Microsoft Windows* OS and MacOSX*, the codecs shipped with an - * OpenCV image (libjpeg, libpng, libtiff, and libjasper) are used by default. - * So, OpenCV can always read JPEGs, PNGs, and TIFFs. On MacOSX, there is also - * an option to use native MacOSX image readers. But beware that currently these - * native image loaders give images with different pixel values because of the - * color management embedded into MacOSX. - *
  • On Linux*, BSD flavors and other Unix-like open-source operating - * systems, OpenCV looks for codecs supplied with an OS image. Install the - * relevant packages (do not forget the development files, for example, - * "libjpeg-dev", in Debian* and Ubuntu*) to get the codec support or turn on - * the OPENCV_BUILD_3RDPARTY_LIBS flag in CMake. - *
- * - *

Note: In the case of color images, the decoded images will have the channels - * stored in B G R order.

- * - * @param filename Name of file to be loaded. - * @param flags Flags specifying the color type of a loaded image: - *
    - *
  • CV_LOAD_IMAGE_ANYDEPTH - If set, return 16-bit/32-bit image when the - * input has the corresponding depth, otherwise convert it to 8-bit. - *
  • CV_LOAD_IMAGE_COLOR - If set, always convert image to the color one - *
  • CV_LOAD_IMAGE_GRAYSCALE - If set, always convert image to the - * grayscale one - *
  • >0 Return a 3-channel color image. - *
- *

Note: In the current implementation the alpha channel, if any, is stripped - * from the output image. Use negative value if you need the alpha channel.

- *
    - *
  • =0 Return a grayscale image. - *
  • <0 Return the loaded image as is (with alpha channel). - *
- * @see org.opencv.highgui.Highgui.imread - */ - public static Mat imread(String filename, int flags) { - - Mat retVal = new Mat(imread_0(filename, flags)); - - return retVal; - } - - /** - *

Loads an image from a file.

- * - *

The function imread loads an image from the specified file and - * returns it. If the image cannot be read (because of missing file, improper - * permissions, unsupported or invalid format), the function returns an empty - * matrix (Mat.data==NULL). Currently, the following file formats - * are supported:

- *
    - *
  • Windows bitmaps - *.bmp, *.dib (always supported) - *
  • JPEG files - *.jpeg, *.jpg, *.jpe (see the *Notes* - * section) - *
  • JPEG 2000 files - *.jp2 (see the *Notes* section) - *
  • Portable Network Graphics - *.png (see the *Notes* - * section) - *
  • Portable image format - *.pbm, *.pgm, *.ppm (always - * supported) - *
  • Sun rasters - *.sr, *.ras (always supported) - *
  • TIFF files - *.tiff, *.tif (see the *Notes* section) - *
- * - *

Note:

- *
    - *
  • The function determines the type of an image by the content, not by - * the file extension. - *
  • On Microsoft Windows* OS and MacOSX*, the codecs shipped with an - * OpenCV image (libjpeg, libpng, libtiff, and libjasper) are used by default. - * So, OpenCV can always read JPEGs, PNGs, and TIFFs. On MacOSX, there is also - * an option to use native MacOSX image readers. But beware that currently these - * native image loaders give images with different pixel values because of the - * color management embedded into MacOSX. - *
  • On Linux*, BSD flavors and other Unix-like open-source operating - * systems, OpenCV looks for codecs supplied with an OS image. Install the - * relevant packages (do not forget the development files, for example, - * "libjpeg-dev", in Debian* and Ubuntu*) to get the codec support or turn on - * the OPENCV_BUILD_3RDPARTY_LIBS flag in CMake. - *
- * - *

Note: In the case of color images, the decoded images will have the channels - * stored in B G R order.

- * - * @param filename Name of file to be loaded. - * @see org.opencv.highgui.Highgui.imread - */ - public static Mat imread(String filename) { - - Mat retVal = new Mat(imread_1(filename)); - - return retVal; - } - - - // - // C++: bool imwrite(string filename, Mat img, vector_int params = vector()) - // - - /** - *

Saves an image to a specified file.

- * - *

The function imwrite saves the image to the specified file. The - * image format is chosen based on the filename extension (see - * "imread" for the list of extensions). Only 8-bit (or 16-bit unsigned - * (CV_16U) in case of PNG, JPEG 2000, and TIFF) single-channel or - * 3-channel (with 'BGR' channel order) images can be saved using this function. - * If the format, depth or channel order is different, use "Mat.convertTo", and - * "cvtColor" to convert it before saving. Or, use the universal "FileStorage" - * I/O functions to save the image to XML or YAML format. - * It is possible to store PNG images with an alpha channel using this function. - * To do this, create 8-bit (or 16-bit) 4-channel image BGRA, where the alpha - * channel goes last. Fully transparent pixels should have alpha set to 0, fully - * opaque pixels should have alpha set to 255/65535. The sample below shows how - * to create such a BGRA image and store to PNG file. It also demonstrates how - * to set custom compression parameters

- * - *

// C++ code:

- * - *

#include

- * - *

#include

- * - *

#include

- * - *

using namespace cv;

- * - *

using namespace std;

- * - *

void createAlphaMat(Mat &mat)

- * - * - *

CV_Assert(mat.channels() == 4);

- * - *

for (int i = 0; i < mat.rows; ++i) {

- * - *

for (int j = 0; j < mat.cols; ++j) {

- * - *

Vec4b& bgra = mat.at(i, j);

- * - *

bgra[0] = UCHAR_MAX; // Blue

- * - *

bgra[1] = saturate_cast((float (mat.cols - j)) / ((float)mat.cols) * - * UCHAR_MAX); // Green

- * - *

bgra[2] = saturate_cast((float (mat.rows - i)) / ((float)mat.rows) * - * UCHAR_MAX); // Red

- * - *

bgra[3] = saturate_cast(0.5 * (bgra[1] + bgra[2])); // Alpha

- * - * - * - * - *

int main(int argv, char argc)

- * - * - *

// Create mat with alpha channel

- * - *

Mat mat(480, 640, CV_8UC4);

- * - *

createAlphaMat(mat);

- * - *

vector compression_params;

- * - *

compression_params.push_back(CV_IMWRITE_PNG_COMPRESSION);

- * - *

compression_params.push_back(9);

- * - *

try {

- * - *

imwrite("alpha.png", mat, compression_params);

- * - * - *

catch (runtime_error& ex) {

- * - *

fprintf(stderr, "Exception converting image to PNG format: %sn", ex.what());

- * - *

return 1;

- * - * - *

fprintf(stdout, "Saved PNG file with alpha data.n");

- * - *

return 0;

- * - * @param filename Name of the file. - * @param img a img - * @param params Format-specific save parameters encoded as pairs - * paramId_1, paramValue_1, paramId_2, paramValue_2,.... The - * following parameters are currently supported: - *
    - *
  • For JPEG, it can be a quality (CV_IMWRITE_JPEG_QUALITY) - * from 0 to 100 (the higher is the better). Default value is 95. - *
  • For PNG, it can be the compression level (CV_IMWRITE_PNG_COMPRESSION) - * from 0 to 9. A higher value means a smaller size and longer compression time. - * Default value is 3. - *
  • For PPM, PGM, or PBM, it can be a binary format flag (CV_IMWRITE_PXM_BINARY), - * 0 or 1. Default value is 1. - *
- * @see org.opencv.highgui.Highgui.imwrite - */ - public static boolean imwrite(String filename, Mat img, MatOfInt params) { - Mat params_mat = params; - boolean retVal = imwrite_0(filename, img.nativeObj, params_mat.nativeObj); - - return retVal; - } - - /** - *

Saves an image to a specified file.

- * - *

The function imwrite saves the image to the specified file. The - * image format is chosen based on the filename extension (see - * "imread" for the list of extensions). Only 8-bit (or 16-bit unsigned - * (CV_16U) in case of PNG, JPEG 2000, and TIFF) single-channel or - * 3-channel (with 'BGR' channel order) images can be saved using this function. - * If the format, depth or channel order is different, use "Mat.convertTo", and - * "cvtColor" to convert it before saving. Or, use the universal "FileStorage" - * I/O functions to save the image to XML or YAML format. - * It is possible to store PNG images with an alpha channel using this function. - * To do this, create 8-bit (or 16-bit) 4-channel image BGRA, where the alpha - * channel goes last. Fully transparent pixels should have alpha set to 0, fully - * opaque pixels should have alpha set to 255/65535. The sample below shows how - * to create such a BGRA image and store to PNG file. It also demonstrates how - * to set custom compression parameters

- * - *

// C++ code:

- * - *

#include

- * - *

#include

- * - *

#include

- * - *

using namespace cv;

- * - *

using namespace std;

- * - *

void createAlphaMat(Mat &mat)

- * - * - *

CV_Assert(mat.channels() == 4);

- * - *

for (int i = 0; i < mat.rows; ++i) {

- * - *

for (int j = 0; j < mat.cols; ++j) {

- * - *

Vec4b& bgra = mat.at(i, j);

- * - *

bgra[0] = UCHAR_MAX; // Blue

- * - *

bgra[1] = saturate_cast((float (mat.cols - j)) / ((float)mat.cols) * - * UCHAR_MAX); // Green

- * - *

bgra[2] = saturate_cast((float (mat.rows - i)) / ((float)mat.rows) * - * UCHAR_MAX); // Red

- * - *

bgra[3] = saturate_cast(0.5 * (bgra[1] + bgra[2])); // Alpha

- * - * - * - * - *

int main(int argv, char argc)

- * - * - *

// Create mat with alpha channel

- * - *

Mat mat(480, 640, CV_8UC4);

- * - *

createAlphaMat(mat);

- * - *

vector compression_params;

- * - *

compression_params.push_back(CV_IMWRITE_PNG_COMPRESSION);

- * - *

compression_params.push_back(9);

- * - *

try {

- * - *

imwrite("alpha.png", mat, compression_params);

- * - * - *

catch (runtime_error& ex) {

- * - *

fprintf(stderr, "Exception converting image to PNG format: %sn", ex.what());

- * - *

return 1;

- * - * - *

fprintf(stdout, "Saved PNG file with alpha data.n");

- * - *

return 0;

- * - * @param filename Name of the file. - * @param img a img - * @see org.opencv.highgui.Highgui.imwrite - */ - public static boolean imwrite(String filename, Mat img) { - - boolean retVal = imwrite_1(filename, img.nativeObj); - - return retVal; - } - - - // C++: Mat imdecode(Mat buf, int flags) - private static native long imdecode_0(long buf_nativeObj, int flags); - - // C++: bool imencode(string ext, Mat img, vector_uchar& buf, vector_int params = vector()) - private static native boolean imencode_0(String ext, long img_nativeObj, long buf_mat_nativeObj, long params_mat_nativeObj); - - private static native boolean imencode_1(String ext, long img_nativeObj, long buf_mat_nativeObj); - - // C++: Mat imread(string filename, int flags = 1) - private static native long imread_0(String filename, int flags); - - private static native long imread_1(String filename); - - // C++: bool imwrite(string filename, Mat img, vector_int params = vector()) - private static native boolean imwrite_0(String filename, long img_nativeObj, long params_mat_nativeObj); - - private static native boolean imwrite_1(String filename, long img_nativeObj); - - /** - * @param binaryType 二值化类型 - * @param adaptiveMethod - */ - public static String zxing_bar_code(String sourcePath,Integer adaptiveMethod, Integer binaryType) { + public static String zxing_bar_code(String sourcePath) { System.loadLibrary(Core.NATIVE_LIBRARY_NAME); // 加载为灰度图显示 Mat source = Imgcodecs.imread(sourcePath,Imgcodecs.IMREAD_GRAYSCALE); Mat destination = new Mat(source.rows(), source.cols(), source.type()); + //二值化类型 Imgproc.threshold(source, destination, 190, 255, Imgproc.THRESH_BINARY); String result = parseCode(destination); log.info("result:{}",result); @@ -629,11 +207,9 @@ public class PlateUtil { hints.put(DecodeHintType.CHARACTER_SET, "UTF-8"); Result result = formatReader.decode(binaryBitmap, hints); - StringBuffer sbuffer = new StringBuffer(); - sbuffer.append("解析结果 = " + result.toString() + "\n"); - sbuffer.append("格式类型 = " + result.getBarcodeFormat() + "\n"); - sbuffer.append("文本内容 = " + result.getText() + "\n"); - resultText = sbuffer.toString(); + resultText = "解析结果 = " + result.toString() + "\n" + + "格式类型 = " + result.getBarcodeFormat() + "\n" + + "文本内容 = " + result.getText() + "\n"; } catch (Exception e) { e.printStackTrace(); } @@ -653,4 +229,20 @@ public class PlateUtil { System.arraycopy(b, 0, targetPixels, 0, b.length); return image; } + + public static String getPdfContent(File file) { + PdfReader reader = null; + StringBuilder buff = new StringBuilder(); + try { + reader = new PdfReader(new FileInputStream(file)); + int num = reader.getNumberOfPages();// 获得页数 + for (int i = 1; i <= num; i++) { + String textFromPage = PdfTextExtractor.getTextFromPage(reader, i); + buff.append(textFromPage); + } + } catch (IOException e) { + e.printStackTrace(); + } + return buff.toString(); + } } diff --git a/src/main/java/com/zhangmeng/tools/utils/ResourcesUtils.java b/src/main/java/com/zhangmeng/tools/utils/ResourcesUtils.java index 51fd7a9..ddc2675 100644 --- a/src/main/java/com/zhangmeng/tools/utils/ResourcesUtils.java +++ b/src/main/java/com/zhangmeng/tools/utils/ResourcesUtils.java @@ -123,6 +123,7 @@ public class ResourcesUtils { Maven_Install_Jar("maven安装jar",10), Word_ocr("文字识别ocr",11), Bar_Code("条形码识别",12), + Pdf_Ocr("pdf识别",13), ; SmallTools(String title, int index) { diff --git a/src/main/resources/fxml/home.fxml b/src/main/resources/fxml/home.fxml index 9b19f36..f6e8f20 100644 --- a/src/main/resources/fxml/home.fxml +++ b/src/main/resources/fxml/home.fxml @@ -42,7 +42,8 @@ - + + diff --git a/src/main/resources/fxml/pdf-ocr.fxml b/src/main/resources/fxml/pdf-ocr.fxml new file mode 100644 index 0000000..45f812e --- /dev/null +++ b/src/main/resources/fxml/pdf-ocr.fxml @@ -0,0 +1,18 @@ + + + + + + + + + + +