diff --git a/cli/paperfind.sh b/cli/paperfind.sh
index fa2ca4f..b111198 100755
--- a/cli/paperfind.sh
+++ b/cli/paperfind.sh
@@ -13,6 +13,7 @@
# along with this program. If not, see .
#
# -h This help.
+#
# Query:
# -Q [-d ]
# [-l ]
@@ -27,6 +28,7 @@
# LABELS = json_string { "," json_string } .
# COUNT = json_number .
# TYPE = """pdf""" | """pages""" .
+#
# Retrieve a document's thumbnails:
# -T : the folder-name of the document
# Result (WSN):
@@ -37,18 +39,35 @@
# B64_DATA = json_string .
# WIDTH = json_number .
# HEIGHT = json_number .
+#
# Retrieve a document's page/PDF and metadata:
# -D -p
# Result (WSN):
# OUTPUT = CONTENTS .
+# The "data" field contains the wanted page in JPEG format, except if the
+# document is a PDF file: then the whole PDF file is encoded.
# Width and height should be ignored for PDF contents.
+#
+# Retrieve a document's page's metadata without the actual page:
+# -M -p
+# Result (WSN):
+# OUTPUT = "{""mime"":" MIME ",""width"":" WIDTH ",""height"":" HEIGHT "}" .
+# Width and height should be ignored for PDF contents.
+#
# Retrieve a raw document's page/PDF without metadata:
# -R -p
-# Result: file contents.
+# Result: page contents.
+# The returned data is the raw page in JPEG format, except if the document is
+# a PDF file: then the whole PDF file is returned.
+#
+# The behaviour exposed above changes if the commands "pdfinfo" and "pdftoppm"
+# are both available. In this case, pages from PDF documents are treated the
+# same way as pages from image-based documents.
########## CONFIGURATION ##########
BASE='/PATH/TO/PAPERWORK/BASE/DIRECTORY'
+PDF_DPI=90
##### NO CHANGE PAST THIS LINE #####
@@ -60,16 +79,17 @@ words=()
q_ci=
doc=
page=
+{ type pdfinfo && type pdftoppm; } &>/dev/null && pdfasjpg=true
# READ COMMAND LINE PARAMETERS
-while getopts hQd:l:k:iT:D:p:R: opt; do case "$opt" in
+while getopts hQd:l:k:iT:D:p:M:R: opt; do case "$opt" in
h) sed -n '2,/^$/s/.//p' "$0"; exit 0 ;;
Q) mode=Q ;;
d) IFS='|' read -a dates < <(tr -dc '|[:digit:]' <<<"$OPTARG") ;;
l) IFS='|' read -a labels < <(tr -d ',"[:cntrl:]' <<<"$OPTARG") ;;
k) IFS='|' read -a words < <(tr -d ',"[:cntrl:]' <<<"$OPTARG") ;;
i) q_ci=true ;;
-T|D|R)
+T|D|M|R)
mode=$opt; doc=$(tr -dc '[:digit:]_' <<<"$OPTARG") ;;
p) page=$(tr -dc '[:digit:]' <<<"$OPTARG") ;;
esac; done
@@ -79,17 +99,33 @@ case "$mode" in
Q) for ((i=${#dates[*]}-1;i>=0;i--)); do [ ${#dates[i]} -ge 4 ] || unset dates[i]; done
[ -n "${dates[*]}${labels[*]}${words[*]}" ] || exit 2 ;;
T) [ -n "$doc" -a -d "$BASE/$doc" ] || exit 3 ;;
-D|R)
+D|M|R)
[ -n "$doc" -a -d "$BASE/$doc" ] || exit 3
- [ -f "$BASE/$doc/doc.pdf" -o -f "$BASE/$doc/paper.$page.jpg" ] || exit 3 ;;
+ if [ -f "$BASE/$doc/doc.pdf" ]; then
+ if [ -n "$pdfasjpg" ]; then
+ maxp=$(pdfinfo "$BASE/$doc/doc.pdf" | awk '/^Pages:/{print $2}')
+ [ -n "$maxp" -a -n "$page" -a $page -gt 0 -a $page -le $maxp ] || exit 3
+ fi
+ else
+ [ -f "$BASE/$doc/paper.$page.jpg" ] || exit 3
+ fi ;;
*) exit 1 ;;
esac
# RUN
+
+# &0: raw string
+# &1: json string
function json_string() {
printf '"%s"' "$(sed 's#[\\/"]#\\g;s#\t#\\t#g')"
}
+# &0: image data
+# &1: " "
+function image_wh() {
+ file -b - | sed -r 's/.*, ([0-9]+)x([0-9]+),[^,]*$/\1 \2/'
+}
+
cd "$BASE"
case "$mode" in
Q)
@@ -144,27 +180,43 @@ T)
printf '['
/bin/ls -1 $doc/paper.*.thumb.jpg | sort -t. -k2,2n \
| while read t; do
- read w h < <(file -b "$t" | sed -r 's/.*, ([0-9]+)x([0-9]+),[^,]*$/\1 \2/')
+ read w h < <(image_wh <$t)
printf ',{"mime":"image\/jpeg","data":%s,"width":%d,"height":%d}' \
"$(base64 --wrap=0 "$t" | json_string)" $w $h
done | sed 's/^.//'
printf ']'
;;
-D)
+D|M)
if [ -f $doc/doc.pdf ]; then
p=$doc/doc.pdf
- w=0; h=0
+ if [ -n "$pdfasjpg" ]; then
+ read w h < <(pdftoppm -r $PDF_DPI -jpeg -f $page -l $page $p | image_wh)
+ mime='image/jpeg'
+ cmd=(pdftoppm -r $PDF_DPI -jpeg -f $page -l $page $p)
+ else
+ w=0; h=0; mime='application/pdf'
+ cmd=(cat $p)
+ fi
else
p=$doc/paper.$page.jpg
- read w h < <(file -b "$p" | sed -r 's/.*, ([0-9]+)x([0-9]+),[^,]*$/\1 \2/')
+ read w h < <(image_wh <$p)
+ mime='image/jpeg'
+ cmd=(cat $p)
+ fi
+ if [ $mode == D ]; then
+ printf '{"mime":%s,"data":%s,"width":%d,"height":%d}' \
+ "$(json_string <<<"$mime")" "$("${cmd[@]}" | base64 --wrap=0 | json_string)" $w $h
+ else
+ printf '{"mime":%s,"width":%d,"height":%d}' "$(json_string <<<"$mime")" $w $h
fi
- mime=$(file -bi $p | cut -d';' -f1)
- printf '{"mime":%s,"data":%s,"width":%d,"height":%d}' \
- "$(json_string <<<"$mime")" "$(base64 --wrap=0 "$p" | json_string)" $w $h
;;
R)
if [ -f $doc/doc.pdf ]; then
- cat $doc/doc.pdf
+ if [ -n "$pdfasjpg" ]; then
+ pdftoppm -r $PDF_DPI -jpeg -f $page -l $page $doc/doc.pdf
+ else
+ cat $doc/doc.pdf
+ fi
else
cat $doc/paper.$page.jpg
fi
diff --git a/web/paperweb.php b/web/paperweb.php
index cc1a82f..c25ebb9 100644
--- a/web/paperweb.php
+++ b/web/paperweb.php
@@ -20,11 +20,16 @@ $USER='USER THAT SUDO WILL RUN paperfind.sh AS';
##### NO CHANGE PAST THIS LINE #####
if (array_key_exists('doDownload', $_REQUEST)) {
- $mime = (@$_REQUEST['type'] == 'pdf' ? 'application/pdf' : 'image/jpeg');
$date = escapeshellarg(@$_REQUEST['date']);
$page = escapeshellarg(@$_REQUEST['page']);
- header("Content-Type: {$mime}");
- passthru("sudo -u {$USER} {$PATH} -R {$date} -p {$page}");
+
+ # -M and -R are used instead of -D to avoid storing the data in RAM
+ $json = exec("sudo -u {$USER} {$PATH} -M {$date} -p {$page}");
+ if ($json) {
+ $meta = json_decode($json, true);
+ header("Content-Type: {$meta['mime']}");
+ passthru("sudo -u {$USER} {$PATH} -R {$date} -p {$page}");
+ }
} else {
?>
@@ -73,10 +78,9 @@ if (array_key_exists('doDownload', $_REQUEST)) {
-