mirror of
https://github.com/kevinveenbirkenbach/bill-manager.git
synced 2024-11-22 01:11:04 +01:00
Implemented logic for files which don't need ocr
This commit is contained in:
parent
c5aa6142ee
commit
f58c83fc3f
16
generate.sh
16
generate.sh
@ -24,7 +24,7 @@ OUTPUT_FOLDER="$1/generated/" || exit 1
|
|||||||
if [ "$MODE" = "update" ]; then
|
if [ "$MODE" = "update" ]; then
|
||||||
echo "Updating bills..."
|
echo "Updating bills..."
|
||||||
else
|
else
|
||||||
if [ "$(ls -A "$TMP_FOLDER")" ]
|
if [ "$(ls -A "$OUTPUT_FOLDER")" ]
|
||||||
then
|
then
|
||||||
echo "Cleaning up $OUTPUT_FOLDER..."
|
echo "Cleaning up $OUTPUT_FOLDER..."
|
||||||
rm -v "$OUTPUT_FOLDER"* || exit 1;
|
rm -v "$OUTPUT_FOLDER"* || exit 1;
|
||||||
@ -36,8 +36,17 @@ for origin_file in "$ORIGIN_FOLDER"*.*; do
|
|||||||
if [ "$MODE" = "update" ] && [ ! -f "$OUTPUT_FOLDER$(basename "$origin_file")"* ] || [ "$MODE" = "initialize" ]; then
|
if [ "$MODE" = "update" ] && [ ! -f "$OUTPUT_FOLDER$(basename "$origin_file")"* ] || [ "$MODE" = "initialize" ]; then
|
||||||
if [ "$(head -c 4 "$origin_file")" = "%PDF" ]; then
|
if [ "$(head -c 4 "$origin_file")" = "%PDF" ]; then
|
||||||
tmp_file="$TMP_FOLDER$(basename "$origin_file")"
|
tmp_file="$TMP_FOLDER$(basename "$origin_file")"
|
||||||
echo "Generating $tmp_file..."
|
txt_output_file="$OUTPUT_FOLDER$(basename "$origin_file").txt"
|
||||||
pdfimages "$origin_file" "$tmp_file"
|
pdftotext "$origin_file" "$txt_output_file"
|
||||||
|
content="$(cat "$txt_output_file")"
|
||||||
|
if [ ${#content} -gt "9" ]
|
||||||
|
then
|
||||||
|
echo "Text successfully extracted to $txt_output_file:"
|
||||||
|
cat "$txt_output_file"
|
||||||
|
else
|
||||||
|
echo "Extract images..."
|
||||||
|
pdfimages "$origin_file" "$tmp_file"
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
cp -v "$origin_file" "$TMP_FOLDER"
|
cp -v "$origin_file" "$TMP_FOLDER"
|
||||||
fi
|
fi
|
||||||
@ -57,3 +66,4 @@ if [ "$(ls -A "$TMP_FOLDER")" ]
|
|||||||
else
|
else
|
||||||
echo "Skipped text generation because $TMP_FOLDER is empty..."
|
echo "Skipped text generation because $TMP_FOLDER is empty..."
|
||||||
fi
|
fi
|
||||||
|
echo "Cleanup..." && rm -v "$TMP_FOLDER"* && rmdir -v "$TMP_FOLDER";
|
||||||
|
Loading…
Reference in New Issue
Block a user