pdf 识别 2023年4月1日11:06:23

master
zhangmeng 2023-04-01 11:12:18 +08:00
parent a72f4e12e7
commit 1a264d9864
11 changed files with 193 additions and 439 deletions

View File

@ -100,6 +100,12 @@
![](./src/main/resources/static/redame/img_41.png)
#### 3.14 pdf内容识别
> 开源项目 itext
![](./src/main/resources/static/redame/img_42.png)
### 4. 编解码工具

12
pom.xml
View File

@ -344,6 +344,18 @@
<artifactId>tess4j</artifactId>
<version>4.5.2</version>
</dependency>
<dependency>
<groupId>com.lowagie</groupId>
<artifactId>itext</artifactId>
<version>4.2.1</version>
</dependency>
<dependency>
<groupId>com.github.codemonstur</groupId>
<artifactId>jpedal</artifactId>
<version>4.92-p13</version>
</dependency>
</dependencies>
<build>

View File

@ -95,7 +95,7 @@ public class BarCodeController {
}
public void init_tess4j(File file) {
String result = PlateUtil.zxing_bar_code(file.getPath(), 1, 0);
String result = PlateUtil.zxing_bar_code(file.getPath());
res_view.setText(result);
}
}

View File

@ -496,4 +496,8 @@ public class HomeController implements Serializable {
public void bar_code_menu_item(ActionEvent event) {
load_small_tools(12);
}
public void pdf_menu_item(ActionEvent event) {
load_small_tools(13);
}
}

View File

@ -0,0 +1,94 @@
package com.zhangmeng.tools.controller;
import com.zhangmeng.tools.utils.AlertUtils;
import com.zhangmeng.tools.utils.ImagePath;
import com.zhangmeng.tools.utils.PlateUtil;
import javafx.application.Platform;
import javafx.beans.property.SimpleObjectProperty;
import javafx.fxml.FXML;
import javafx.scene.control.Button;
import javafx.scene.control.TextArea;
import javafx.scene.control.TextField;
import javafx.scene.image.Image;
import javafx.scene.image.ImageView;
import javafx.stage.FileChooser;
import javafx.stage.Stage;
import lombok.extern.slf4j.Slf4j;
import java.io.File;
/**
* @author :
* @version : 1.0
* @date : 2023-04-01 10:46
*/
@Slf4j
public class PdfOcrController {
public static String tessdata = System.getProperty("user.dir");
@FXML
public Button file_choose_button;
@FXML
public TextArea res_view;
@FXML
public Button cover;
@FXML
public TextField file_path;
public static final SimpleObjectProperty<File> choose_file = new SimpleObjectProperty<>();
@FXML
public void initialize() {
file_choose_button.setText(null);
ImageView iv = new ImageView(new Image(ImagePath.path(ImagePath.ImagePathType.IMAGE_FILE)));
iv.setPreserveRatio(true);
iv.setFitWidth(18);
file_choose_button.setGraphic(iv);
cover.setText("识别");
file_choose_button.setOnAction(event -> {
choose_file();
});
cover.setOnAction(event -> {
if (choose_file.getValue() == null) {
AlertUtils.alert_warning("请选择将要识别的pdf再试!");
return;
}
File file = choose_file.getValue();
Stage alert = AlertUtils.alert_loading(cover.getScene().getWindow());
new Thread(() -> {
Platform.runLater(() -> {
init_pdf(file);
alert.close();
});
}).start();
});
}
public void choose_file() {
Stage stage = new Stage();
FileChooser dc = new FileChooser();
dc.setTitle("文件选择");
dc.getExtensionFilters().addAll(new FileChooser.ExtensionFilter("类型", "*.pdf"));
File file = dc.showOpenDialog(stage);
if (file != null) {
String path = file.getAbsolutePath();
file_path.setText(path);
log.info("file_path:{}", path);
choose_file.set(file);
}
}
public void init_pdf(File file) {
String result = PlateUtil.getPdfContent(file);
res_view.setText(result);
}
}

View File

@ -80,6 +80,7 @@ public class SmallToolsController {
private AnchorPane maven_install_jar;
private AnchorPane word_ocr;
private AnchorPane bar_code;
private AnchorPane pdf_ocr;
@FXML
private ListView<ResourcesUtils.SmallTools> listView;
@ -385,6 +386,13 @@ public class SmallToolsController {
}
bar_code(flag);
}
if (newValue.getIndex() == 13) {
if (pdf_ocr != null) {
flag = true;
}
pdf_ocr(flag);
}
}
});
}
@ -407,6 +415,7 @@ public class SmallToolsController {
case Maven_Install_Jar -> new Image(ImagePath.path(ImagePath.ImagePathType.Qr_CODE));
case Word_ocr -> new Image(ImagePath.path(ImagePath.ImagePathType.Qr_CODE));
case Bar_Code -> new Image(ImagePath.path(ImagePath.ImagePathType.Qr_CODE));
case Pdf_Ocr -> new Image(ImagePath.path(ImagePath.ImagePathType.Qr_CODE));
};
}
@ -677,6 +686,23 @@ public class SmallToolsController {
common_method();
}
public void pdf_ocr(boolean flag) {
//默认选择第一个
listView.getSelectionModel().select(13);
if (!flag) {
try {
root = FXMLLoader.load(ResourcesUtils.getResource("pdf-ocr"));
} catch (IOException e) {
e.printStackTrace();
}
pdf_ocr = root;
} else {
root = pdf_ocr;
}
common_method();
}
private void common_method() {
splitPane.getItems().remove(1);

View File

@ -4,6 +4,9 @@ package com.zhangmeng.tools.utils;
import com.google.zxing.*;
import com.google.zxing.client.j2se.BufferedImageLuminanceSource;
import com.google.zxing.common.HybridBinarizer;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
import lombok.extern.slf4j.Slf4j;
import org.opencv.core.Core;
import org.opencv.core.Mat;
@ -11,9 +14,13 @@ import org.opencv.core.MatOfByte;
import org.opencv.core.MatOfInt;
import org.opencv.imgcodecs.Imgcodecs;
import org.opencv.imgproc.Imgproc;
import org.springframework.web.multipart.MultipartFile;
import java.awt.image.BufferedImage;
import java.awt.image.DataBufferByte;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
@ -170,446 +177,17 @@ public class PlateUtil {
IMWRITE_PNG_STRATEGY_FIXED = 4,
IMWRITE_PXM_BINARY = 32;
//
// C++: Mat imdecode(Mat buf, int flags)
//
/**
* <p>Reads an image from a buffer in memory.</p>
*
* <p>The function reads an image from the specified buffer in the memory.
* If the buffer is too short or contains invalid data, the empty matrix/image
* is returned.</p>
*
* <p>See "imread" for the list of supported formats and flags description.</p>
*
* <p>Note: In the case of color images, the decoded images will have the channels
* stored in <code>B G R</code> order.</p>
*
* @param buf Input array or vector of bytes.
* @param flags The same flags as in "imread".
* @see <a href="http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html#imdecode">org.opencv.highgui.Highgui.imdecode</a>
*
*/
public static Mat imdecode(Mat buf, int flags) {
Mat retVal = new Mat(imdecode_0(buf.nativeObj, flags));
return retVal;
}
//
// C++: bool imencode(string ext, Mat img, vector_uchar& buf, vector_int params = vector<int>())
//
/**
* <p>Encodes an image into a memory buffer.</p>
*
* <p>The function compresses the image and stores it in the memory buffer that is
* resized to fit the result.
* See "imwrite" for the list of supported formats and flags description.</p>
*
* <p>Note: <code>cvEncodeImage</code> returns single-row matrix of type
* <code>CV_8UC1</code> that contains encoded image as array of bytes.</p>
*
* @param ext File extension that defines the output format.
* @param img Image to be written.
* @param buf Output buffer resized to fit the compressed image.
* @param params Format-specific parameters. See "imwrite".
* @see <a href="http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html#imencode">org.opencv.highgui.Highgui.imencode</a>
*/
public static boolean imencode(String ext, Mat img, MatOfByte buf, MatOfInt params) {
Mat buf_mat = buf;
Mat params_mat = params;
boolean retVal = imencode_0(ext, img.nativeObj, buf_mat.nativeObj, params_mat.nativeObj);
return retVal;
}
/**
* <p>Encodes an image into a memory buffer.</p>
*
* <p>The function compresses the image and stores it in the memory buffer that is
* resized to fit the result.
* See "imwrite" for the list of supported formats and flags description.</p>
*
* <p>Note: <code>cvEncodeImage</code> returns single-row matrix of type
* <code>CV_8UC1</code> that contains encoded image as array of bytes.</p>
*
* @param ext File extension that defines the output format.
* @param img Image to be written.
* @param buf Output buffer resized to fit the compressed image.
* @see <a href="http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html#imencode">org.opencv.highgui.Highgui.imencode</a>
*/
public static boolean imencode(String ext, Mat img, MatOfByte buf) {
Mat buf_mat = buf;
boolean retVal = imencode_1(ext, img.nativeObj, buf_mat.nativeObj);
return retVal;
}
//
// C++: Mat imread(string filename, int flags = 1)
//
/**
* <p>Loads an image from a file.</p>
*
* <p>The function <code>imread</code> loads an image from the specified file and
* returns it. If the image cannot be read (because of missing file, improper
* permissions, unsupported or invalid format), the function returns an empty
* matrix (<code>Mat.data==NULL</code>). Currently, the following file formats
* are supported:</p>
* <ul>
* <li> Windows bitmaps - <code>*.bmp, *.dib</code> (always supported)
* <li> JPEG files - <code>*.jpeg, *.jpg, *.jpe</code> (see the *Notes*
* section)
* <li> JPEG 2000 files - <code>*.jp2</code> (see the *Notes* section)
* <li> Portable Network Graphics - <code>*.png</code> (see the *Notes*
* section)
* <li> Portable image format - <code>*.pbm, *.pgm, *.ppm</code> (always
* supported)
* <li> Sun rasters - <code>*.sr, *.ras</code> (always supported)
* <li> TIFF files - <code>*.tiff, *.tif</code> (see the *Notes* section)
* </ul>
*
* <p>Note:</p>
* <ul>
* <li> The function determines the type of an image by the content, not by
* the file extension.
* <li> On Microsoft Windows* OS and MacOSX*, the codecs shipped with an
* OpenCV image (libjpeg, libpng, libtiff, and libjasper) are used by default.
* So, OpenCV can always read JPEGs, PNGs, and TIFFs. On MacOSX, there is also
* an option to use native MacOSX image readers. But beware that currently these
* native image loaders give images with different pixel values because of the
* color management embedded into MacOSX.
* <li> On Linux*, BSD flavors and other Unix-like open-source operating
* systems, OpenCV looks for codecs supplied with an OS image. Install the
* relevant packages (do not forget the development files, for example,
* "libjpeg-dev", in Debian* and Ubuntu*) to get the codec support or turn on
* the <code>OPENCV_BUILD_3RDPARTY_LIBS</code> flag in CMake.
* </ul>
*
* <p>Note: In the case of color images, the decoded images will have the channels
* stored in <code>B G R</code> order.</p>
*
* @param filename Name of file to be loaded.
* @param flags Flags specifying the color type of a loaded image:
* <ul>
* <li> CV_LOAD_IMAGE_ANYDEPTH - If set, return 16-bit/32-bit image when the
* input has the corresponding depth, otherwise convert it to 8-bit.
* <li> CV_LOAD_IMAGE_COLOR - If set, always convert image to the color one
* <li> CV_LOAD_IMAGE_GRAYSCALE - If set, always convert image to the
* grayscale one
* <li> >0 Return a 3-channel color image.
* </ul>
* <p>Note: In the current implementation the alpha channel, if any, is stripped
* from the output image. Use negative value if you need the alpha channel.</p>
* <ul>
* <li> =0 Return a grayscale image.
* <li> <0 Return the loaded image as is (with alpha channel).
* </ul>
* @see <a href="http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html#imread">org.opencv.highgui.Highgui.imread</a>
*/
public static Mat imread(String filename, int flags) {
Mat retVal = new Mat(imread_0(filename, flags));
return retVal;
}
/**
* <p>Loads an image from a file.</p>
*
* <p>The function <code>imread</code> loads an image from the specified file and
* returns it. If the image cannot be read (because of missing file, improper
* permissions, unsupported or invalid format), the function returns an empty
* matrix (<code>Mat.data==NULL</code>). Currently, the following file formats
* are supported:</p>
* <ul>
* <li> Windows bitmaps - <code>*.bmp, *.dib</code> (always supported)
* <li> JPEG files - <code>*.jpeg, *.jpg, *.jpe</code> (see the *Notes*
* section)
* <li> JPEG 2000 files - <code>*.jp2</code> (see the *Notes* section)
* <li> Portable Network Graphics - <code>*.png</code> (see the *Notes*
* section)
* <li> Portable image format - <code>*.pbm, *.pgm, *.ppm</code> (always
* supported)
* <li> Sun rasters - <code>*.sr, *.ras</code> (always supported)
* <li> TIFF files - <code>*.tiff, *.tif</code> (see the *Notes* section)
* </ul>
*
* <p>Note:</p>
* <ul>
* <li> The function determines the type of an image by the content, not by
* the file extension.
* <li> On Microsoft Windows* OS and MacOSX*, the codecs shipped with an
* OpenCV image (libjpeg, libpng, libtiff, and libjasper) are used by default.
* So, OpenCV can always read JPEGs, PNGs, and TIFFs. On MacOSX, there is also
* an option to use native MacOSX image readers. But beware that currently these
* native image loaders give images with different pixel values because of the
* color management embedded into MacOSX.
* <li> On Linux*, BSD flavors and other Unix-like open-source operating
* systems, OpenCV looks for codecs supplied with an OS image. Install the
* relevant packages (do not forget the development files, for example,
* "libjpeg-dev", in Debian* and Ubuntu*) to get the codec support or turn on
* the <code>OPENCV_BUILD_3RDPARTY_LIBS</code> flag in CMake.
* </ul>
*
* <p>Note: In the case of color images, the decoded images will have the channels
* stored in <code>B G R</code> order.</p>
*
* @param filename Name of file to be loaded.
* @see <a href="http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html#imread">org.opencv.highgui.Highgui.imread</a>
*/
public static Mat imread(String filename) {
Mat retVal = new Mat(imread_1(filename));
return retVal;
}
//
// C++: bool imwrite(string filename, Mat img, vector_int params = vector<int>())
//
/**
* <p>Saves an image to a specified file.</p>
*
* <p>The function <code>imwrite</code> saves the image to the specified file. The
* image format is chosen based on the <code>filename</code> extension (see
* "imread" for the list of extensions). Only 8-bit (or 16-bit unsigned
* (<code>CV_16U</code>) in case of PNG, JPEG 2000, and TIFF) single-channel or
* 3-channel (with 'BGR' channel order) images can be saved using this function.
* If the format, depth or channel order is different, use "Mat.convertTo", and
* "cvtColor" to convert it before saving. Or, use the universal "FileStorage"
* I/O functions to save the image to XML or YAML format.
* It is possible to store PNG images with an alpha channel using this function.
* To do this, create 8-bit (or 16-bit) 4-channel image BGRA, where the alpha
* channel goes last. Fully transparent pixels should have alpha set to 0, fully
* opaque pixels should have alpha set to 255/65535. The sample below shows how
* to create such a BGRA image and store to PNG file. It also demonstrates how
* to set custom compression parameters <code></p>
*
* <p>// C++ code:</p>
*
* <p>#include <vector></p>
*
* <p>#include <stdio.h></p>
*
* <p>#include <opencv2/opencv.hpp></p>
*
* <p>using namespace cv;</p>
*
* <p>using namespace std;</p>
*
* <p>void createAlphaMat(Mat &mat)</p>
*
*
* <p>CV_Assert(mat.channels() == 4);</p>
*
* <p>for (int i = 0; i < mat.rows; ++i) {</p>
*
* <p>for (int j = 0; j < mat.cols; ++j) {</p>
*
* <p>Vec4b& bgra = mat.at<Vec4b>(i, j);</p>
*
* <p>bgra[0] = UCHAR_MAX; // Blue</p>
*
* <p>bgra[1] = saturate_cast<uchar>((float (mat.cols - j)) / ((float)mat.cols) *
* UCHAR_MAX); // Green</p>
*
* <p>bgra[2] = saturate_cast<uchar>((float (mat.rows - i)) / ((float)mat.rows) *
* UCHAR_MAX); // Red</p>
*
* <p>bgra[3] = saturate_cast<uchar>(0.5 * (bgra[1] + bgra[2])); // Alpha</p>
*
*
*
*
* <p>int main(int argv, char argc)</p>
*
*
* <p>// Create mat with alpha channel</p>
*
* <p>Mat mat(480, 640, CV_8UC4);</p>
*
* <p>createAlphaMat(mat);</p>
*
* <p>vector<int> compression_params;</p>
*
* <p>compression_params.push_back(CV_IMWRITE_PNG_COMPRESSION);</p>
*
* <p>compression_params.push_back(9);</p>
*
* <p>try {</p>
*
* <p>imwrite("alpha.png", mat, compression_params);</p>
*
*
* <p>catch (runtime_error& ex) {</p>
*
* <p>fprintf(stderr, "Exception converting image to PNG format: %sn", ex.what());</p>
*
* <p>return 1;</p>
*
*
* <p>fprintf(stdout, "Saved PNG file with alpha data.n");</p>
*
* <p>return 0;</p>
*
* @param filename Name of the file.
* @param img a img
* @param params Format-specific save parameters encoded as pairs
* <code>paramId_1, paramValue_1, paramId_2, paramValue_2,...</code>. The
* following parameters are currently supported:
* <ul>
* <li> For JPEG, it can be a quality (<code>CV_IMWRITE_JPEG_QUALITY</code>)
* from 0 to 100 (the higher is the better). Default value is 95.
* <li> For PNG, it can be the compression level (<code>CV_IMWRITE_PNG_COMPRESSION</code>)
* from 0 to 9. A higher value means a smaller size and longer compression time.
* Default value is 3.
* <li> For PPM, PGM, or PBM, it can be a binary format flag (<code>CV_IMWRITE_PXM_BINARY</code>),
* 0 or 1. Default value is 1.
* </ul>
* @see <a href="http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html#imwrite">org.opencv.highgui.Highgui.imwrite</a>
*/
public static boolean imwrite(String filename, Mat img, MatOfInt params) {
Mat params_mat = params;
boolean retVal = imwrite_0(filename, img.nativeObj, params_mat.nativeObj);
return retVal;
}
/**
* <p>Saves an image to a specified file.</p>
*
* <p>The function <code>imwrite</code> saves the image to the specified file. The
* image format is chosen based on the <code>filename</code> extension (see
* "imread" for the list of extensions). Only 8-bit (or 16-bit unsigned
* (<code>CV_16U</code>) in case of PNG, JPEG 2000, and TIFF) single-channel or
* 3-channel (with 'BGR' channel order) images can be saved using this function.
* If the format, depth or channel order is different, use "Mat.convertTo", and
* "cvtColor" to convert it before saving. Or, use the universal "FileStorage"
* I/O functions to save the image to XML or YAML format.
* It is possible to store PNG images with an alpha channel using this function.
* To do this, create 8-bit (or 16-bit) 4-channel image BGRA, where the alpha
* channel goes last. Fully transparent pixels should have alpha set to 0, fully
* opaque pixels should have alpha set to 255/65535. The sample below shows how
* to create such a BGRA image and store to PNG file. It also demonstrates how
* to set custom compression parameters <code></p>
*
* <p>// C++ code:</p>
*
* <p>#include <vector></p>
*
* <p>#include <stdio.h></p>
*
* <p>#include <opencv2/opencv.hpp></p>
*
* <p>using namespace cv;</p>
*
* <p>using namespace std;</p>
*
* <p>void createAlphaMat(Mat &mat)</p>
*
*
* <p>CV_Assert(mat.channels() == 4);</p>
*
* <p>for (int i = 0; i < mat.rows; ++i) {</p>
*
* <p>for (int j = 0; j < mat.cols; ++j) {</p>
*
* <p>Vec4b& bgra = mat.at<Vec4b>(i, j);</p>
*
* <p>bgra[0] = UCHAR_MAX; // Blue</p>
*
* <p>bgra[1] = saturate_cast<uchar>((float (mat.cols - j)) / ((float)mat.cols) *
* UCHAR_MAX); // Green</p>
*
* <p>bgra[2] = saturate_cast<uchar>((float (mat.rows - i)) / ((float)mat.rows) *
* UCHAR_MAX); // Red</p>
*
* <p>bgra[3] = saturate_cast<uchar>(0.5 * (bgra[1] + bgra[2])); // Alpha</p>
*
*
*
*
* <p>int main(int argv, char argc)</p>
*
*
* <p>// Create mat with alpha channel</p>
*
* <p>Mat mat(480, 640, CV_8UC4);</p>
*
* <p>createAlphaMat(mat);</p>
*
* <p>vector<int> compression_params;</p>
*
* <p>compression_params.push_back(CV_IMWRITE_PNG_COMPRESSION);</p>
*
* <p>compression_params.push_back(9);</p>
*
* <p>try {</p>
*
* <p>imwrite("alpha.png", mat, compression_params);</p>
*
*
* <p>catch (runtime_error& ex) {</p>
*
* <p>fprintf(stderr, "Exception converting image to PNG format: %sn", ex.what());</p>
*
* <p>return 1;</p>
*
*
* <p>fprintf(stdout, "Saved PNG file with alpha data.n");</p>
*
* <p>return 0;</p>
*
* @param filename Name of the file.
* @param img a img
* @see <a href="http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html#imwrite">org.opencv.highgui.Highgui.imwrite</a>
*/
public static boolean imwrite(String filename, Mat img) {
boolean retVal = imwrite_1(filename, img.nativeObj);
return retVal;
}
// C++: Mat imdecode(Mat buf, int flags)
private static native long imdecode_0(long buf_nativeObj, int flags);
// C++: bool imencode(string ext, Mat img, vector_uchar& buf, vector_int params = vector<int>())
private static native boolean imencode_0(String ext, long img_nativeObj, long buf_mat_nativeObj, long params_mat_nativeObj);
private static native boolean imencode_1(String ext, long img_nativeObj, long buf_mat_nativeObj);
// C++: Mat imread(string filename, int flags = 1)
private static native long imread_0(String filename, int flags);
private static native long imread_1(String filename);
// C++: bool imwrite(string filename, Mat img, vector_int params = vector<int>())
private static native boolean imwrite_0(String filename, long img_nativeObj, long params_mat_nativeObj);
private static native boolean imwrite_1(String filename, long img_nativeObj);
/**
* @param binaryType
* @param adaptiveMethod
*/
public static String zxing_bar_code(String sourcePath,Integer adaptiveMethod, Integer binaryType) {
public static String zxing_bar_code(String sourcePath) {
System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
// 加载为灰度图显示
Mat source = Imgcodecs.imread(sourcePath,Imgcodecs.IMREAD_GRAYSCALE);
Mat destination = new Mat(source.rows(), source.cols(), source.type());
//二值化类型
Imgproc.threshold(source, destination, 190, 255, Imgproc.THRESH_BINARY);
String result = parseCode(destination);
log.info("result:{}",result);
@ -629,11 +207,9 @@ public class PlateUtil {
hints.put(DecodeHintType.CHARACTER_SET, "UTF-8");
Result result = formatReader.decode(binaryBitmap, hints);
StringBuffer sbuffer = new StringBuffer();
sbuffer.append("解析结果 = " + result.toString() + "\n");
sbuffer.append("格式类型 = " + result.getBarcodeFormat() + "\n");
sbuffer.append("文本内容 = " + result.getText() + "\n");
resultText = sbuffer.toString();
resultText = "解析结果 = " + result.toString() + "\n" +
"格式类型 = " + result.getBarcodeFormat() + "\n" +
"文本内容 = " + result.getText() + "\n";
} catch (Exception e) {
e.printStackTrace();
}
@ -653,4 +229,20 @@ public class PlateUtil {
System.arraycopy(b, 0, targetPixels, 0, b.length);
return image;
}
public static String getPdfContent(File file) {
PdfReader reader = null;
StringBuilder buff = new StringBuilder();
try {
reader = new PdfReader(new FileInputStream(file));
int num = reader.getNumberOfPages();// 获得页数
for (int i = 1; i <= num; i++) {
String textFromPage = PdfTextExtractor.getTextFromPage(reader, i);
buff.append(textFromPage);
}
} catch (IOException e) {
e.printStackTrace();
}
return buff.toString();
}
}

View File

@ -123,6 +123,7 @@ public class ResourcesUtils {
Maven_Install_Jar("maven安装jar",10),
Word_ocr("文字识别ocr",11),
Bar_Code("条形码识别",12),
Pdf_Ocr("pdf识别",13),
;
SmallTools(String title, int index) {

View File

@ -42,7 +42,8 @@
<MenuItem mnemonicParsing="false" text="JsonView" onAction="#JsonView_menu_item"/>
<MenuItem mnemonicParsing="false" text="maven-jar-install" onAction="#maven_jar_install_menu_item"/>
<MenuItem mnemonicParsing="false" text="word-ocr" onAction="#word_ocr_menu_item"/>
<MenuItem mnemonicParsing="false" text="条形码识别r" onAction="#bar_code_menu_item"/>
<MenuItem mnemonicParsing="false" text="条形码识别" onAction="#bar_code_menu_item"/>
<MenuItem mnemonicParsing="false" text="pdf识别" onAction="#pdf_menu_item"/>
</items>
</Menu>

View File

@ -0,0 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>
<?import javafx.scene.control.Button?>
<?import javafx.scene.control.Label?>
<?import javafx.scene.control.TextArea?>
<?import javafx.scene.control.TextField?>
<?import javafx.scene.layout.AnchorPane?>
<AnchorPane prefHeight="649.0" prefWidth="1200.0" xmlns="http://javafx.com/javafx/19" xmlns:fx="http://javafx.com/fxml/1" fx:controller="com.zhangmeng.tools.controller.PdfOcrController">
<children>
<TextArea fx:id="res_view" layoutX="727.0" layoutY="112.0" prefHeight="340.0" prefWidth="956.0" AnchorPane.bottomAnchor="197.0" AnchorPane.leftAnchor="201.0" AnchorPane.rightAnchor="43.0" AnchorPane.topAnchor="112.0" />
<Button fx:id="file_choose_button" layoutX="202.0" layoutY="67.0" mnemonicParsing="false" text="Button" />
<Label layoutX="58.0" layoutY="71.0" text="请选择将要识别的pdf:" />
<TextField fx:id="file_path" layoutX="280.0" layoutY="67.0" prefHeight="25.0" prefWidth="430.0" AnchorPane.leftAnchor="280.0" AnchorPane.rightAnchor="490.0" />
<Button fx:id="cover" layoutX="390.0" layoutY="492.0" mnemonicParsing="false" text="Button" AnchorPane.bottomAnchor="134.0" />
<Label layoutX="102.0" layoutY="112.0" text="识别内容:" />
</children>
</AnchorPane>

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB