etag in CLI
parent
e73af72e67
commit
d9e98f704a
102
cli/paperfind.sh
102
cli/paperfind.sh
|
@ -23,18 +23,26 @@
|
|||
# Result (WSN):
|
||||
# OUTPUT = "[" [ DOC_LIST ] "]" .
|
||||
# DOC_LIST = DOC { "," DOC } .
|
||||
# DOC = "{""folder"":" FOLDER ",""labels"":[" [ LABELS ] "],""count"":" COUNT ",""type"":" TYPE "}" .
|
||||
# DOC = "{""folder"":" FOLDER ",""labels"":[" [ LABELS ]
|
||||
# "],""count"":" COUNT ",""type"":" TYPE ",""etag"":" ETAG "}" .
|
||||
# FOLDER = json_string .
|
||||
# LABELS = json_string { "," json_string } .
|
||||
# COUNT = json_number .
|
||||
# TYPE = """pdf""" | """pages""" .
|
||||
# ETAG = json_string .
|
||||
#
|
||||
# Retrieve a single document's metadata:
|
||||
# -M <date>
|
||||
# Result (WSN):
|
||||
# OUTPUT = DOC .
|
||||
#
|
||||
# Retrieve a document's thumbnails:
|
||||
# -T <date> : the folder-name of the document
|
||||
# Result (WSN):
|
||||
# OUTPUT = "[" THUMBS "]" .
|
||||
# THUMBS = CONTENTS { "," CONTENTS } .
|
||||
# CONTENTS = "{""mime"":" MIME ",""data"":" B64_DATA ",""width"":" WIDTH ",""height"":" HEIGHT "}" .
|
||||
# CONTENTS = "{""mime"":" MIME ",""data"":" B64_DATA ",""width"":"
|
||||
# WIDTH ",""height"":" HEIGHT ",""etag"":" ETAG "}" .
|
||||
# MIME = json_string .
|
||||
# B64_DATA = json_string .
|
||||
# WIDTH = json_number .
|
||||
|
@ -48,10 +56,11 @@
|
|||
# document is a PDF file: then the whole PDF file is encoded.
|
||||
# Width and height should be ignored for PDF contents.
|
||||
#
|
||||
# Retrieve a document's page's metadata without the actual page:
|
||||
# Retrieve a document page's metadata without the actual page:
|
||||
# -M <date> -p <page number>
|
||||
# Result (WSN):
|
||||
# OUTPUT = "{""mime"":" MIME ",""width"":" WIDTH ",""height"":" HEIGHT "}" .
|
||||
# OUTPUT = "{""mime"":" MIME ",""width"":" WIDTH ",""height"":"
|
||||
# HEIGHT ",""etag"":" ETAG "}" .
|
||||
# Width and height should be ignored for PDF contents.
|
||||
#
|
||||
# Retrieve a raw document's page/PDF without metadata:
|
||||
|
@ -101,6 +110,7 @@ Q) for ((i=${#dates[*]}-1;i>=0;i--)); do [ ${#dates[i]} -ge 4 ] || unset dates[i
|
|||
T) [ -n "$doc" -a -d "$BASE/$doc" ] || exit 3 ;;
|
||||
D|M|R)
|
||||
[ -n "$doc" -a -d "$BASE/$doc" ] || exit 3
|
||||
if [ $mode != M -o -n "$page" ]; then
|
||||
if [ -f "$BASE/$doc/doc.pdf" ]; then
|
||||
if [ -n "$pdfasjpg" ]; then
|
||||
maxp=$(pdfinfo "$BASE/$doc/doc.pdf" | awk '/^Pages:/{print $2}')
|
||||
|
@ -108,22 +118,62 @@ D|M|R)
|
|||
fi
|
||||
else
|
||||
[ -f "$BASE/$doc/paper.$page.jpg" ] || exit 3
|
||||
fi
|
||||
fi ;;
|
||||
*) exit 1 ;;
|
||||
esac
|
||||
|
||||
# RUN
|
||||
|
||||
# &0: image data
|
||||
# &1: "<width> <height>"
|
||||
function image_wh() {
|
||||
file -b - | sed -r 's/.*, ([0-9]+)x([0-9]+),[^,]*$/\1 \2/'
|
||||
}
|
||||
|
||||
# &0: raw string
|
||||
# &1: json string
|
||||
function json_string() {
|
||||
printf '"%s"' "$(sed 's#[\\/"]#\\&#g;s#\t#\\t#g')"
|
||||
}
|
||||
|
||||
# &0: image data
|
||||
# &1: "<width> <height>"
|
||||
function image_wh() {
|
||||
file -b - | sed -r 's/.*, ([0-9]+)x([0-9]+),[^,]*$/\1 \2/'
|
||||
# $1: folder name (relative path)
|
||||
# &1: json DOC
|
||||
function json_doc() {
|
||||
local type count labs lab nil etag
|
||||
[ -f $1/doc.pdf ] && type=pdf || type=pages
|
||||
count=$(/bin/ls -1 $1/paper.*.thumb.jpg 2>/dev/null | wc -l)
|
||||
labs="$(
|
||||
while IFS=, read lab nil; do printf ','; json_string <<<"$lab"; done < <(sort -df $1/labels))"
|
||||
etag=$(find $1 -maxdepth 0 -printf '%T@')
|
||||
printf '{"folder":%s,"labels":[%s],"count":%d,"type":"%s","etag":%s}' \
|
||||
"$(json_string <<<"$1")" "${labs:1}" $count "$type" "$(json_string <<<"$etag")"
|
||||
}
|
||||
|
||||
# $1: file path
|
||||
#[$2: page number (if it must be extracted from a PDF)]
|
||||
#[$3: "nodata"]
|
||||
# &1: json CONTENTS
|
||||
function json_contents() {
|
||||
local mime w=0 h=0 etag
|
||||
local -a cmd=(cat $1)
|
||||
mime=$(file -bi "$1" | cut -d';' -f1)
|
||||
if [ "$mime" != 'application/pdf' ]; then
|
||||
read w h < <(image_wh <$1)
|
||||
elif [ -n "$2" -a -n "$pdfasjpg" ]; then
|
||||
cmd=(pdftoppm -r $PDF_DPI -jpeg -f $2 -l $2 $1)
|
||||
mime=image/jpeg
|
||||
read w h < <("${cmd[@]}" | image_wh)
|
||||
fi
|
||||
etag=$(find $1 -printf '%T@')
|
||||
if [ -n "$3" ]; then
|
||||
printf '{"mime":%s,"width":%d,"height":%d,"etag":%s}' \
|
||||
"$(json_string <<<"$mime")" $w $h "$(json_string <<<"$etag")"
|
||||
else
|
||||
printf '{"mime":%s,"width":%d,"height":%d,"etag":%s,"data":%s}' \
|
||||
"$(json_string <<<"$mime")" $w $h "$(json_string <<<"$etag")" \
|
||||
"$("${cmd[@]}" | base64 --wrap=0 | json_string)"
|
||||
fi
|
||||
}
|
||||
|
||||
cd "$BASE"
|
||||
|
@ -167,12 +217,7 @@ Q)
|
|||
fi
|
||||
printf '['
|
||||
while IFS=/ read folder nil; do if [ -n "$folder" ]; then
|
||||
[ -f $folder/doc.pdf ] && type=pdf || type=pages
|
||||
count=$(/bin/ls -1 $folder/paper.*.thumb.jpg 2>/dev/null | wc -l)
|
||||
labs="$(
|
||||
while IFS=, read lab nil; do printf ','; json_string <<<"$lab"; done < <(sort -df $folder/labels))"
|
||||
printf ',{"folder":%s,"labels":[%s],"count":%d,"type":"%s"}' \
|
||||
"$(json_string <<<"$folder")" "${labs:1}" $count "$type"
|
||||
printf ',%s' "$(json_doc $folder)"
|
||||
fi; done < <(sort -r <<<"$found") | sed 's/^.//'
|
||||
printf ']'
|
||||
;;
|
||||
|
@ -180,34 +225,17 @@ T)
|
|||
printf '['
|
||||
/bin/ls -1 $doc/paper.*.thumb.jpg | sort -t. -k2,2n \
|
||||
| while read t; do
|
||||
read w h < <(image_wh <$t)
|
||||
printf ',{"mime":"image\/jpeg","data":%s,"width":%d,"height":%d}' \
|
||||
"$(base64 --wrap=0 "$t" | json_string)" $w $h
|
||||
printf ',%s' "$(json_contents $t)"
|
||||
done | sed 's/^.//'
|
||||
printf ']'
|
||||
;;
|
||||
D|M)
|
||||
if [ -f $doc/doc.pdf ]; then
|
||||
p=$doc/doc.pdf
|
||||
if [ -n "$pdfasjpg" ]; then
|
||||
read w h < <(pdftoppm -r $PDF_DPI -jpeg -f $page -l $page $p | image_wh)
|
||||
mime='image/jpeg'
|
||||
cmd=(pdftoppm -r $PDF_DPI -jpeg -f $page -l $page $p)
|
||||
if [ -z "$page" ]; then
|
||||
json_doc $doc
|
||||
elif [ -f $doc/doc.pdf ]; then
|
||||
json_contents $doc/doc.pdf $page ${mode/D}
|
||||
else
|
||||
w=0; h=0; mime='application/pdf'
|
||||
cmd=(cat $p)
|
||||
fi
|
||||
else
|
||||
p=$doc/paper.$page.jpg
|
||||
read w h < <(image_wh <$p)
|
||||
mime='image/jpeg'
|
||||
cmd=(cat $p)
|
||||
fi
|
||||
if [ $mode == D ]; then
|
||||
printf '{"mime":%s,"data":%s,"width":%d,"height":%d}' \
|
||||
"$(json_string <<<"$mime")" "$("${cmd[@]}" | base64 --wrap=0 | json_string)" $w $h
|
||||
else
|
||||
printf '{"mime":%s,"width":%d,"height":%d}' "$(json_string <<<"$mime")" $w $h
|
||||
json_contents $doc/paper.$page.jpg '' ${mode/D}
|
||||
fi
|
||||
;;
|
||||
R)
|
||||
|
|
Loading…
Reference in New Issue