etag in CLI

master
tYYGH 2016-01-28 23:46:21 +01:00
parent e73af72e67
commit d9e98f704a
1 changed files with 71 additions and 43 deletions

View File

@ -23,18 +23,26 @@
# Result (WSN):
# OUTPUT = "[" [ DOC_LIST ] "]" .
# DOC_LIST = DOC { "," DOC } .
# DOC = "{""folder"":" FOLDER ",""labels"":[" [ LABELS ] "],""count"":" COUNT ",""type"":" TYPE "}" .
# DOC = "{""folder"":" FOLDER ",""labels"":[" [ LABELS ]
# "],""count"":" COUNT ",""type"":" TYPE ",""etag"":" ETAG "}" .
# FOLDER = json_string .
# LABELS = json_string { "," json_string } .
# COUNT = json_number .
# TYPE = """pdf""" | """pages""" .
# ETAG = json_string .
#
# Retrieve a single document's metadata:
# -M <date>
# Result (WSN):
# OUTPUT = DOC .
#
# Retrieve a document's thumbnails:
# -T <date> : the folder-name of the document
# Result (WSN):
# OUTPUT = "[" THUMBS "]" .
# THUMBS = CONTENTS { "," CONTENTS } .
# CONTENTS = "{""mime"":" MIME ",""data"":" B64_DATA ",""width"":" WIDTH ",""height"":" HEIGHT "}" .
# CONTENTS = "{""mime"":" MIME ",""data"":" B64_DATA ",""width"":"
# WIDTH ",""height"":" HEIGHT ",""etag"":" ETAG "}" .
# MIME = json_string .
# B64_DATA = json_string .
# WIDTH = json_number .
@ -48,10 +56,11 @@
# document is a PDF file: then the whole PDF file is encoded.
# Width and height should be ignored for PDF contents.
#
# Retrieve a document's page's metadata without the actual page:
# Retrieve a document page's metadata without the actual page:
# -M <date> -p <page number>
# Result (WSN):
# OUTPUT = "{""mime"":" MIME ",""width"":" WIDTH ",""height"":" HEIGHT "}" .
# OUTPUT = "{""mime"":" MIME ",""width"":" WIDTH ",""height"":"
# HEIGHT ",""etag"":" ETAG "}" .
# Width and height should be ignored for PDF contents.
#
# Retrieve a raw document's page/PDF without metadata:
@ -101,29 +110,70 @@ Q) for ((i=${#dates[*]}-1;i>=0;i--)); do [ ${#dates[i]} -ge 4 ] || unset dates[i
T) [ -n "$doc" -a -d "$BASE/$doc" ] || exit 3 ;;
D|M|R)
[ -n "$doc" -a -d "$BASE/$doc" ] || exit 3
if [ -f "$BASE/$doc/doc.pdf" ]; then
if [ -n "$pdfasjpg" ]; then
maxp=$(pdfinfo "$BASE/$doc/doc.pdf" | awk '/^Pages:/{print $2}')
[ -n "$maxp" -a -n "$page" -a $page -gt 0 -a $page -le $maxp ] || exit 3
if [ $mode != M -o -n "$page" ]; then
if [ -f "$BASE/$doc/doc.pdf" ]; then
if [ -n "$pdfasjpg" ]; then
maxp=$(pdfinfo "$BASE/$doc/doc.pdf" | awk '/^Pages:/{print $2}')
[ -n "$maxp" -a -n "$page" -a $page -gt 0 -a $page -le $maxp ] || exit 3
fi
else
[ -f "$BASE/$doc/paper.$page.jpg" ] || exit 3
fi
else
[ -f "$BASE/$doc/paper.$page.jpg" ] || exit 3
fi ;;
*) exit 1 ;;
esac
# RUN
# &0: image data
# &1: "<width> <height>"
function image_wh() {
file -b - | sed -r 's/.*, ([0-9]+)x([0-9]+),[^,]*$/\1 \2/'
}
# &0: raw string
# &1: json string
function json_string() {
printf '"%s"' "$(sed 's#[\\/"]#\\&#g;s#\t#\\t#g')"
}
# &0: image data
# &1: "<width> <height>"
function image_wh() {
file -b - | sed -r 's/.*, ([0-9]+)x([0-9]+),[^,]*$/\1 \2/'
# $1: folder name (relative path)
# &1: json DOC
function json_doc() {
local type count labs lab nil etag
[ -f $1/doc.pdf ] && type=pdf || type=pages
count=$(/bin/ls -1 $1/paper.*.thumb.jpg 2>/dev/null | wc -l)
labs="$(
while IFS=, read lab nil; do printf ','; json_string <<<"$lab"; done < <(sort -df $1/labels))"
etag=$(find $1 -maxdepth 0 -printf '%T@')
printf '{"folder":%s,"labels":[%s],"count":%d,"type":"%s","etag":%s}' \
"$(json_string <<<"$1")" "${labs:1}" $count "$type" "$(json_string <<<"$etag")"
}
# $1: file path
#[$2: page number (if it must be extracted from a PDF)]
#[$3: "nodata"]
# &1: json CONTENTS
function json_contents() {
local mime w=0 h=0 etag
local -a cmd=(cat $1)
mime=$(file -bi "$1" | cut -d';' -f1)
if [ "$mime" != 'application/pdf' ]; then
read w h < <(image_wh <$1)
elif [ -n "$2" -a -n "$pdfasjpg" ]; then
cmd=(pdftoppm -r $PDF_DPI -jpeg -f $2 -l $2 $1)
mime=image/jpeg
read w h < <("${cmd[@]}" | image_wh)
fi
etag=$(find $1 -printf '%T@')
if [ -n "$3" ]; then
printf '{"mime":%s,"width":%d,"height":%d,"etag":%s}' \
"$(json_string <<<"$mime")" $w $h "$(json_string <<<"$etag")"
else
printf '{"mime":%s,"width":%d,"height":%d,"etag":%s,"data":%s}' \
"$(json_string <<<"$mime")" $w $h "$(json_string <<<"$etag")" \
"$("${cmd[@]}" | base64 --wrap=0 | json_string)"
fi
}
cd "$BASE"
@ -167,12 +217,7 @@ Q)
fi
printf '['
while IFS=/ read folder nil; do if [ -n "$folder" ]; then
[ -f $folder/doc.pdf ] && type=pdf || type=pages
count=$(/bin/ls -1 $folder/paper.*.thumb.jpg 2>/dev/null | wc -l)
labs="$(
while IFS=, read lab nil; do printf ','; json_string <<<"$lab"; done < <(sort -df $folder/labels))"
printf ',{"folder":%s,"labels":[%s],"count":%d,"type":"%s"}' \
"$(json_string <<<"$folder")" "${labs:1}" $count "$type"
printf ',%s' "$(json_doc $folder)"
fi; done < <(sort -r <<<"$found") | sed 's/^.//'
printf ']'
;;
@ -180,34 +225,17 @@ T)
printf '['
/bin/ls -1 $doc/paper.*.thumb.jpg | sort -t. -k2,2n \
| while read t; do
read w h < <(image_wh <$t)
printf ',{"mime":"image\/jpeg","data":%s,"width":%d,"height":%d}' \
"$(base64 --wrap=0 "$t" | json_string)" $w $h
printf ',%s' "$(json_contents $t)"
done | sed 's/^.//'
printf ']'
;;
D|M)
if [ -f $doc/doc.pdf ]; then
p=$doc/doc.pdf
if [ -n "$pdfasjpg" ]; then
read w h < <(pdftoppm -r $PDF_DPI -jpeg -f $page -l $page $p | image_wh)
mime='image/jpeg'
cmd=(pdftoppm -r $PDF_DPI -jpeg -f $page -l $page $p)
else
w=0; h=0; mime='application/pdf'
cmd=(cat $p)
fi
if [ -z "$page" ]; then
json_doc $doc
elif [ -f $doc/doc.pdf ]; then
json_contents $doc/doc.pdf $page ${mode/D}
else
p=$doc/paper.$page.jpg
read w h < <(image_wh <$p)
mime='image/jpeg'
cmd=(cat $p)
fi
if [ $mode == D ]; then
printf '{"mime":%s,"data":%s,"width":%d,"height":%d}' \
"$(json_string <<<"$mime")" "$("${cmd[@]}" | base64 --wrap=0 | json_string)" $w $h
else
printf '{"mime":%s,"width":%d,"height":%d}' "$(json_string <<<"$mime")" $w $h
json_contents $doc/paper.$page.jpg '' ${mode/D}
fi
;;
R)