etag in CLI

master
tYYGH 2016-01-28 23:46:21 +01:00
parent e73af72e67
commit d9e98f704a
1 changed files with 71 additions and 43 deletions

View File

@ -23,18 +23,26 @@
# Result (WSN): # Result (WSN):
# OUTPUT = "[" [ DOC_LIST ] "]" . # OUTPUT = "[" [ DOC_LIST ] "]" .
# DOC_LIST = DOC { "," DOC } . # DOC_LIST = DOC { "," DOC } .
# DOC = "{""folder"":" FOLDER ",""labels"":[" [ LABELS ] "],""count"":" COUNT ",""type"":" TYPE "}" . # DOC = "{""folder"":" FOLDER ",""labels"":[" [ LABELS ]
# "],""count"":" COUNT ",""type"":" TYPE ",""etag"":" ETAG "}" .
# FOLDER = json_string . # FOLDER = json_string .
# LABELS = json_string { "," json_string } . # LABELS = json_string { "," json_string } .
# COUNT = json_number . # COUNT = json_number .
# TYPE = """pdf""" | """pages""" . # TYPE = """pdf""" | """pages""" .
# ETAG = json_string .
#
# Retrieve a single document's metadata:
# -M <date>
# Result (WSN):
# OUTPUT = DOC .
# #
# Retrieve a document's thumbnails: # Retrieve a document's thumbnails:
# -T <date> : the folder-name of the document # -T <date> : the folder-name of the document
# Result (WSN): # Result (WSN):
# OUTPUT = "[" THUMBS "]" . # OUTPUT = "[" THUMBS "]" .
# THUMBS = CONTENTS { "," CONTENTS } . # THUMBS = CONTENTS { "," CONTENTS } .
# CONTENTS = "{""mime"":" MIME ",""data"":" B64_DATA ",""width"":" WIDTH ",""height"":" HEIGHT "}" . # CONTENTS = "{""mime"":" MIME ",""data"":" B64_DATA ",""width"":"
# WIDTH ",""height"":" HEIGHT ",""etag"":" ETAG "}" .
# MIME = json_string . # MIME = json_string .
# B64_DATA = json_string . # B64_DATA = json_string .
# WIDTH = json_number . # WIDTH = json_number .
@ -48,10 +56,11 @@
# document is a PDF file: then the whole PDF file is encoded. # document is a PDF file: then the whole PDF file is encoded.
# Width and height should be ignored for PDF contents. # Width and height should be ignored for PDF contents.
# #
# Retrieve a document's page's metadata without the actual page: # Retrieve a document page's metadata without the actual page:
# -M <date> -p <page number> # -M <date> -p <page number>
# Result (WSN): # Result (WSN):
# OUTPUT = "{""mime"":" MIME ",""width"":" WIDTH ",""height"":" HEIGHT "}" . # OUTPUT = "{""mime"":" MIME ",""width"":" WIDTH ",""height"":"
# HEIGHT ",""etag"":" ETAG "}" .
# Width and height should be ignored for PDF contents. # Width and height should be ignored for PDF contents.
# #
# Retrieve a raw document's page/PDF without metadata: # Retrieve a raw document's page/PDF without metadata:
@ -101,29 +110,70 @@ Q) for ((i=${#dates[*]}-1;i>=0;i--)); do [ ${#dates[i]} -ge 4 ] || unset dates[i
T) [ -n "$doc" -a -d "$BASE/$doc" ] || exit 3 ;; T) [ -n "$doc" -a -d "$BASE/$doc" ] || exit 3 ;;
D|M|R) D|M|R)
[ -n "$doc" -a -d "$BASE/$doc" ] || exit 3 [ -n "$doc" -a -d "$BASE/$doc" ] || exit 3
if [ -f "$BASE/$doc/doc.pdf" ]; then if [ $mode != M -o -n "$page" ]; then
if [ -n "$pdfasjpg" ]; then if [ -f "$BASE/$doc/doc.pdf" ]; then
maxp=$(pdfinfo "$BASE/$doc/doc.pdf" | awk '/^Pages:/{print $2}') if [ -n "$pdfasjpg" ]; then
[ -n "$maxp" -a -n "$page" -a $page -gt 0 -a $page -le $maxp ] || exit 3 maxp=$(pdfinfo "$BASE/$doc/doc.pdf" | awk '/^Pages:/{print $2}')
[ -n "$maxp" -a -n "$page" -a $page -gt 0 -a $page -le $maxp ] || exit 3
fi
else
[ -f "$BASE/$doc/paper.$page.jpg" ] || exit 3
fi fi
else
[ -f "$BASE/$doc/paper.$page.jpg" ] || exit 3
fi ;; fi ;;
*) exit 1 ;; *) exit 1 ;;
esac esac
# RUN # RUN
# &0: image data
# &1: "<width> <height>"
function image_wh() {
file -b - | sed -r 's/.*, ([0-9]+)x([0-9]+),[^,]*$/\1 \2/'
}
# &0: raw string # &0: raw string
# &1: json string # &1: json string
function json_string() { function json_string() {
printf '"%s"' "$(sed 's#[\\/"]#\\&#g;s#\t#\\t#g')" printf '"%s"' "$(sed 's#[\\/"]#\\&#g;s#\t#\\t#g')"
} }
# &0: image data # $1: folder name (relative path)
# &1: "<width> <height>" # &1: json DOC
function image_wh() { function json_doc() {
file -b - | sed -r 's/.*, ([0-9]+)x([0-9]+),[^,]*$/\1 \2/' local type count labs lab nil etag
[ -f $1/doc.pdf ] && type=pdf || type=pages
count=$(/bin/ls -1 $1/paper.*.thumb.jpg 2>/dev/null | wc -l)
labs="$(
while IFS=, read lab nil; do printf ','; json_string <<<"$lab"; done < <(sort -df $1/labels))"
etag=$(find $1 -maxdepth 0 -printf '%T@')
printf '{"folder":%s,"labels":[%s],"count":%d,"type":"%s","etag":%s}' \
"$(json_string <<<"$1")" "${labs:1}" $count "$type" "$(json_string <<<"$etag")"
}
# $1: file path
#[$2: page number (if it must be extracted from a PDF)]
#[$3: "nodata"]
# &1: json CONTENTS
function json_contents() {
local mime w=0 h=0 etag
local -a cmd=(cat $1)
mime=$(file -bi "$1" | cut -d';' -f1)
if [ "$mime" != 'application/pdf' ]; then
read w h < <(image_wh <$1)
elif [ -n "$2" -a -n "$pdfasjpg" ]; then
cmd=(pdftoppm -r $PDF_DPI -jpeg -f $2 -l $2 $1)
mime=image/jpeg
read w h < <("${cmd[@]}" | image_wh)
fi
etag=$(find $1 -printf '%T@')
if [ -n "$3" ]; then
printf '{"mime":%s,"width":%d,"height":%d,"etag":%s}' \
"$(json_string <<<"$mime")" $w $h "$(json_string <<<"$etag")"
else
printf '{"mime":%s,"width":%d,"height":%d,"etag":%s,"data":%s}' \
"$(json_string <<<"$mime")" $w $h "$(json_string <<<"$etag")" \
"$("${cmd[@]}" | base64 --wrap=0 | json_string)"
fi
} }
cd "$BASE" cd "$BASE"
@ -167,12 +217,7 @@ Q)
fi fi
printf '[' printf '['
while IFS=/ read folder nil; do if [ -n "$folder" ]; then while IFS=/ read folder nil; do if [ -n "$folder" ]; then
[ -f $folder/doc.pdf ] && type=pdf || type=pages printf ',%s' "$(json_doc $folder)"
count=$(/bin/ls -1 $folder/paper.*.thumb.jpg 2>/dev/null | wc -l)
labs="$(
while IFS=, read lab nil; do printf ','; json_string <<<"$lab"; done < <(sort -df $folder/labels))"
printf ',{"folder":%s,"labels":[%s],"count":%d,"type":"%s"}' \
"$(json_string <<<"$folder")" "${labs:1}" $count "$type"
fi; done < <(sort -r <<<"$found") | sed 's/^.//' fi; done < <(sort -r <<<"$found") | sed 's/^.//'
printf ']' printf ']'
;; ;;
@ -180,34 +225,17 @@ T)
printf '[' printf '['
/bin/ls -1 $doc/paper.*.thumb.jpg | sort -t. -k2,2n \ /bin/ls -1 $doc/paper.*.thumb.jpg | sort -t. -k2,2n \
| while read t; do | while read t; do
read w h < <(image_wh <$t) printf ',%s' "$(json_contents $t)"
printf ',{"mime":"image\/jpeg","data":%s,"width":%d,"height":%d}' \
"$(base64 --wrap=0 "$t" | json_string)" $w $h
done | sed 's/^.//' done | sed 's/^.//'
printf ']' printf ']'
;; ;;
D|M) D|M)
if [ -f $doc/doc.pdf ]; then if [ -z "$page" ]; then
p=$doc/doc.pdf json_doc $doc
if [ -n "$pdfasjpg" ]; then elif [ -f $doc/doc.pdf ]; then
read w h < <(pdftoppm -r $PDF_DPI -jpeg -f $page -l $page $p | image_wh) json_contents $doc/doc.pdf $page ${mode/D}
mime='image/jpeg'
cmd=(pdftoppm -r $PDF_DPI -jpeg -f $page -l $page $p)
else
w=0; h=0; mime='application/pdf'
cmd=(cat $p)
fi
else else
p=$doc/paper.$page.jpg json_contents $doc/paper.$page.jpg '' ${mode/D}
read w h < <(image_wh <$p)
mime='image/jpeg'
cmd=(cat $p)
fi
if [ $mode == D ]; then
printf '{"mime":%s,"data":%s,"width":%d,"height":%d}' \
"$(json_string <<<"$mime")" "$("${cmd[@]}" | base64 --wrap=0 | json_string)" $w $h
else
printf '{"mime":%s,"width":%d,"height":%d}' "$(json_string <<<"$mime")" $w $h
fi fi
;; ;;
R) R)