mirror of
https://github.com/kevinveenbirkenbach/bill-manager.git
synced 2024-11-24 18:31:04 +01:00
Implemented update option
This commit is contained in:
parent
c91929f28e
commit
4b0d6ed9ea
43
generate.sh
43
generate.sh
@ -1,18 +1,39 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# @see https://faceted.wordpress.com/2010/07/11/how-to-extract-text-from-pdf-files-using-poppler-and-gocr-on-ubuntu/
|
# @author Kevin Veen-Birkenbach
|
||||||
# @param $1 Working directory
|
# @param $1 Working directory
|
||||||
# @param $2 language
|
# @param $2 language
|
||||||
|
# @param $3 mode (update,initialize)
|
||||||
# sudo pacman -Syyu tesseract-data-deu tesseract-data-en tesseract
|
# sudo pacman -Syyu tesseract-data-deu tesseract-data-en tesseract
|
||||||
if [ -z "$2" ]
|
if [ -z "$2" ]
|
||||||
then
|
then
|
||||||
echo "You need to define an working directory" && exit 1;
|
echo "You need to define an working directory and a language" && exit 1;
|
||||||
|
fi
|
||||||
|
if [ -z "$3" ]
|
||||||
|
then
|
||||||
|
MODE="initialize"
|
||||||
|
else
|
||||||
|
if [ "$3" != "update" ]
|
||||||
|
then
|
||||||
|
echo "Unknown option: $3" && exit 1
|
||||||
|
fi
|
||||||
|
MODE="$3"
|
||||||
fi
|
fi
|
||||||
TMP_FOLDER="$(mktemp -d)/" &&
|
TMP_FOLDER="$(mktemp -d)/" &&
|
||||||
ORIGIN_FOLDER="$1/origin/" &&
|
ORIGIN_FOLDER="$1/origin/" &&
|
||||||
OUTPUT_FOLDER="$1/generated/" &&
|
OUTPUT_FOLDER="$1/generated/" || exit 1
|
||||||
echo "Cleaning up $OUTPUT_FOLDER..." &&
|
if [ "$MODE" = "update" ]; then
|
||||||
rm -v "$OUTPUT_FOLDER"* || exit 1;
|
echo "Updating bills..."
|
||||||
|
else
|
||||||
|
if [ "$(ls -A "$TMP_FOLDER")" ]
|
||||||
|
then
|
||||||
|
echo "Cleaning up $OUTPUT_FOLDER..."
|
||||||
|
rm -v "$OUTPUT_FOLDER"* || exit 1;
|
||||||
|
else
|
||||||
|
echo "$OUTPUT_FOLDER is allready cleaned up!"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
for origin_file in "$ORIGIN_FOLDER"*.*; do
|
for origin_file in "$ORIGIN_FOLDER"*.*; do
|
||||||
|
if [ "$MODE" = "update" ] && [ "$(test -f "$OUTPUT_FOLDER$(basename "$origin_file")"*)" ] || [ "$MODE" = "initialize" ]; then
|
||||||
if [ "$(head -c 4 "$origin_file")" = "%PDF" ]; then
|
if [ "$(head -c 4 "$origin_file")" = "%PDF" ]; then
|
||||||
tmp_file="$TMP_FOLDER$(basename "$origin_file")"
|
tmp_file="$TMP_FOLDER$(basename "$origin_file")"
|
||||||
echo "Generating $tmp_file..."
|
echo "Generating $tmp_file..."
|
||||||
@ -20,11 +41,19 @@ for origin_file in "$ORIGIN_FOLDER"*.*; do
|
|||||||
else
|
else
|
||||||
cp -v "$origin_file" "$TMP_FOLDER"
|
cp -v "$origin_file" "$TMP_FOLDER"
|
||||||
fi
|
fi
|
||||||
|
else
|
||||||
|
echo "Skipped $origin_file..."
|
||||||
|
fi
|
||||||
done
|
done
|
||||||
for tesseract_input_file in "$TMP_FOLDER"*.*; do
|
if [ "$(ls -A "$TMP_FOLDER")" ]
|
||||||
|
then
|
||||||
|
for tesseract_input_file in "$TMP_FOLDER"*.*; do
|
||||||
txt_output_file_without_suffix="$OUTPUT_FOLDER$(basename "$tesseract_input_file")";
|
txt_output_file_without_suffix="$OUTPUT_FOLDER$(basename "$tesseract_input_file")";
|
||||||
echo "Generating $txt_output_file_without_suffix.txt..."
|
echo "Generating $txt_output_file_without_suffix.txt..."
|
||||||
tesseract -l "$2" "$tesseract_input_file" "$txt_output_file_without_suffix";
|
tesseract -l "$2" "$tesseract_input_file" "$txt_output_file_without_suffix";
|
||||||
echo "file content:"
|
echo "file content:"
|
||||||
cat "$txt_output_file_without_suffix.txt"
|
cat "$txt_output_file_without_suffix.txt"
|
||||||
done
|
done
|
||||||
|
else
|
||||||
|
echo "Skipped text generation because $TMP_FOLDER is empty..."
|
||||||
|
fi
|
||||||
|
Loading…
Reference in New Issue
Block a user