> CentOS > CentOS教程 >

Centos下将远程图片识别为文字的脚本

使用该自动脚本之前,需要先安装TESSERACT和ImageMagick,安装使用教程请看如何在Centos下识别图片中的文字并存储到txt中。

 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/bash
# Script name: image2text.sh
# Author: lxy.me (http://lxy.me )
# URL: http://lxy.me/centos-remote-picture-identification-text-script.html
# Description: this script will use tesseract and ImageMagick to convert an image to text.
# Script usage: ./image2text.sh <URL> <output_file>
 
# Variables
tmp_dir="/tmp" # change this if your temporary directory is *not* /tmp
 
# Error function
function error {
 
_error_message=$1
 
echo "Error: $_error_message"
exit 1
 
}
# Check number of arguments
[ $# -eq 2 ] || error "Script usage: ./image2text.sh <URL> <output_file>"
 
# Check that tesseract is installed
[ `which tesseract 2> /dev/null` ] || error "Please install tesseract."
 
# Check that ImageMagick convert is installed
[ `which convert 2> /dev/null` ] || error "Please install ImageMagick."
 
# Check that wget is installed
[ `which wget 2> /dev/null` ] || error "Please install wget."
URL="$1"
OUTPUT="$2"
TMP_NAME=`mktemp`
 
echo ""
 
if [ -f "$OUTPUT.txt" ]; then
 
echo -n "Warning: File $OUTPUT.txt already exists. Please press enter to continue, or press CTRL+C to quit now."
read pause < /dev/tty
echo ""
fi
 
echo "Downloading file: $URL"
 
wget "$URL" -O "$TMP_NAME-download" > /dev/null 2>&1
 
# Check wget exit status
if [ $? -ne 0 ]; then error "Unable to retrieve file $URL" ; fi
 
IMG_CHECK=`identify "$TMP_NAME-download" > /dev/null 2>&1`
 
if [ $? -ne 0 ]; then
 
error "Unable to identify image type for $URL."
fi
EXT=`identify "$TMP_NAME-download" | awk '{ print $2 }' | tr '[:upper:]' '[:lower:]' 2> /dev/null`
 
if [ "$EXT" != "tif" ] && [ "$EXT" != "bmp" ] ; then # Image conversion required
 
echo "Detected image format: $EXT"
echo "Converting image"
convert "$TMP_NAME-download" "$TMP_NAME.tif" > /dev/null 2>&1
tesseract "$TMP_NAME.tif" "$OUTPUT" > /dev/null 2>&1
echo "Cleaning up..."
rm -f "$TMP_NAME" "$TMP_NAME.tif" "$TMP_NAME-download"
 
else
 
echo "Detected image format: $EXT"
tesseract "$TMP_NAME.tif" "$OUTPUT" > /dev/null 2>&1
echo "Cleaning up..."
rm -f "$TMP_NAME" "$TMP_NAME-download"
 
fi
 
if [ $? -eq 0 ]; then
 
echo "Conversion of $URL completed successfully!"
echo "Text has been saved to: $OUTPUT.txt"
 
else
 
echo "Conversion of $URL failed. "
 
fi




(责任编辑:IT)