etag in CLI

2016-01-28 23:46:21 +01:00 · 2016-01-28 23:46:21 +01:00 · d9e98f704a
parent e73af72e67
commit d9e98f704a
1 changed files with 71 additions and 43 deletions
--- a/cli/paperfind.sh
+++ b/cli/paperfind.sh
@ -23,18 +23,26 @@
 #   Result (WSN):
 #     OUTPUT   = "[" [ DOC_LIST ] "]" .
 #     DOC_LIST = DOC { "," DOC } .
-#     DOC      = "{""folder"":" FOLDER ",""labels"":[" [ LABELS ] "],""count"":" COUNT ",""type"":" TYPE "}" .
+#     DOC      = "{""folder"":" FOLDER ",""labels"":[" [ LABELS ]
+#                "],""count"":" COUNT ",""type"":" TYPE ",""etag"":" ETAG "}" .
 #     FOLDER   = json_string .
 #     LABELS   = json_string { "," json_string } .
 #     COUNT    = json_number .
 #     TYPE     = """pdf""" | """pages""" .
+#     ETAG     = json_string .
+#
+# Retrieve a single document's metadata:
+#   -M <date>
+#   Result (WSN):
+#     OUTPUT   = DOC .
 #
 # Retrieve a document's thumbnails:
 #   -T <date> : the folder-name of the document
 #   Result (WSN):
 #     OUTPUT   = "[" THUMBS "]" .
 #     THUMBS   = CONTENTS { "," CONTENTS } .
-#     CONTENTS = "{""mime"":" MIME ",""data"":" B64_DATA ",""width"":" WIDTH ",""height"":" HEIGHT "}" .
+#     CONTENTS = "{""mime"":" MIME ",""data"":" B64_DATA ",""width"":"
+#                WIDTH ",""height"":" HEIGHT ",""etag"":" ETAG "}" .
 #     MIME     = json_string .
 #     B64_DATA = json_string .
 #     WIDTH    = json_number .
@ -48,10 +56,11 @@
 #   document is a PDF file: then the whole PDF file is encoded.
 #   Width and height should be ignored for PDF contents.
 #
-# Retrieve a document's page's metadata without the actual page:
+# Retrieve a document page's metadata without the actual page:
 #   -M <date> -p <page number>
 #   Result (WSN):
-#     OUTPUT   = "{""mime"":" MIME ",""width"":" WIDTH ",""height"":" HEIGHT "}" .
+#     OUTPUT   = "{""mime"":" MIME ",""width"":" WIDTH ",""height"":"
+#                HEIGHT ",""etag"":" ETAG "}" .
 #   Width and height should be ignored for PDF contents.
 #
 # Retrieve a raw document's page/PDF without metadata:
@ -101,29 +110,70 @@ Q) for ((i=${#dates[*]}-1;i>=0;i--)); do [ ${#dates[i]} -ge 4 ] || unset dates[i
 T) [ -n "$doc" -a -d "$BASE/$doc" ] || exit 3 ;;
 D|M|R)
   [ -n "$doc" -a -d "$BASE/$doc" ] || exit 3
-   if [ -f "$BASE/$doc/doc.pdf" ]; then
-     if [ -n "$pdfasjpg" ]; then
-       maxp=$(pdfinfo "$BASE/$doc/doc.pdf" | awk '/^Pages:/{print $2}')
-       [ -n "$maxp" -a -n "$page" -a $page -gt 0 -a $page -le $maxp ] || exit 3
+   if [ $mode != M -o -n "$page" ]; then
+     if [ -f "$BASE/$doc/doc.pdf" ]; then
+       if [ -n "$pdfasjpg" ]; then
+         maxp=$(pdfinfo "$BASE/$doc/doc.pdf" | awk '/^Pages:/{print $2}')
+         [ -n "$maxp" -a -n "$page" -a $page -gt 0 -a $page -le $maxp ] || exit 3
+       fi
+     else
+       [ -f "$BASE/$doc/paper.$page.jpg" ] || exit 3
     fi
-   else
-     [ -f "$BASE/$doc/paper.$page.jpg" ] || exit 3
   fi ;;
 *) exit 1 ;;
 esac

 # RUN

+# &0: image data
+# &1: "<width> <height>"
+function image_wh() {
+  file -b - | sed -r 's/.*, ([0-9]+)x([0-9]+),[^,]*$/\1 \2/'
+}
+
 # &0: raw string
 # &1: json string
 function json_string() {
  printf '"%s"' "$(sed 's#[\\/"]#\\&#g;s#\t#\\t#g')"
 }

-# &0: image data
-# &1: "<width> <height>"
-function image_wh() {
-  file -b - | sed -r 's/.*, ([0-9]+)x([0-9]+),[^,]*$/\1 \2/'
+# $1: folder name (relative path)
+# &1: json DOC
+function json_doc() {
+  local type count labs lab nil etag
+  [ -f $1/doc.pdf ] && type=pdf || type=pages
+  count=$(/bin/ls -1 $1/paper.*.thumb.jpg 2>/dev/null | wc -l)
+  labs="$(
+    while IFS=, read lab nil; do printf ','; json_string <<<"$lab"; done < <(sort -df $1/labels))"
+  etag=$(find $1 -maxdepth 0 -printf '%T@')
+  printf '{"folder":%s,"labels":[%s],"count":%d,"type":"%s","etag":%s}' \
+    "$(json_string <<<"$1")" "${labs:1}" $count "$type" "$(json_string <<<"$etag")"
+}
+
+# $1: file path
+#[$2: page number (if it must be extracted from a PDF)]
+#[$3: "nodata"]
+# &1: json CONTENTS
+function json_contents() {
+  local mime w=0 h=0 etag
+  local -a cmd=(cat $1)
+  mime=$(file -bi "$1" | cut -d';' -f1)
+  if [ "$mime" != 'application/pdf' ]; then
+    read w h < <(image_wh <$1)
+  elif [ -n "$2" -a -n "$pdfasjpg" ]; then
+    cmd=(pdftoppm -r $PDF_DPI -jpeg -f $2 -l $2 $1)
+    mime=image/jpeg
+    read w h < <("${cmd[@]}" | image_wh)
+  fi
+  etag=$(find $1 -printf '%T@')
+  if [ -n "$3" ]; then
+    printf '{"mime":%s,"width":%d,"height":%d,"etag":%s}' \
+      "$(json_string <<<"$mime")" $w $h "$(json_string <<<"$etag")"
+  else
+    printf '{"mime":%s,"width":%d,"height":%d,"etag":%s,"data":%s}' \
+      "$(json_string <<<"$mime")" $w $h "$(json_string <<<"$etag")" \
+      "$("${cmd[@]}" | base64 --wrap=0 | json_string)"
+  fi
 }

 cd "$BASE"
@ -167,12 +217,7 @@ Q)
  fi
  printf '['
  while IFS=/ read folder nil; do if [ -n "$folder" ]; then
-    [ -f $folder/doc.pdf ] && type=pdf || type=pages
-    count=$(/bin/ls -1 $folder/paper.*.thumb.jpg 2>/dev/null | wc -l)
-    labs="$(
-      while IFS=, read lab nil; do printf ','; json_string <<<"$lab"; done < <(sort -df $folder/labels))"
-    printf ',{"folder":%s,"labels":[%s],"count":%d,"type":"%s"}' \
-      "$(json_string <<<"$folder")" "${labs:1}" $count "$type"
+    printf ',%s' "$(json_doc $folder)"
  fi; done < <(sort -r <<<"$found") | sed 's/^.//'
  printf ']'
  ;;
@ -180,34 +225,17 @@ T)
  printf '['
  /bin/ls -1 $doc/paper.*.thumb.jpg | sort -t. -k2,2n \
  | while read t; do
-    read w h < <(image_wh <$t)
-    printf ',{"mime":"image\/jpeg","data":%s,"width":%d,"height":%d}' \
-      "$(base64 --wrap=0 "$t" | json_string)" $w $h
+    printf ',%s' "$(json_contents $t)"
  done | sed 's/^.//'
  printf ']'
  ;;
 D|M)
-  if [ -f $doc/doc.pdf ]; then
-    p=$doc/doc.pdf
-    if [ -n "$pdfasjpg" ]; then
-      read w h < <(pdftoppm -r $PDF_DPI -jpeg -f $page -l $page $p | image_wh)
-      mime='image/jpeg'
-      cmd=(pdftoppm -r $PDF_DPI -jpeg -f $page -l $page $p)
-    else
-      w=0; h=0; mime='application/pdf'
-      cmd=(cat $p)
-    fi
+  if [ -z "$page" ]; then
+    json_doc $doc
+  elif [ -f $doc/doc.pdf ]; then
+    json_contents $doc/doc.pdf $page ${mode/D}
  else
-    p=$doc/paper.$page.jpg
-    read w h < <(image_wh <$p)
-    mime='image/jpeg'
-    cmd=(cat $p)
-  fi
-  if [ $mode == D ]; then
-    printf '{"mime":%s,"data":%s,"width":%d,"height":%d}' \
-      "$(json_string <<<"$mime")" "$("${cmd[@]}" | base64 --wrap=0 | json_string)" $w $h
-  else
-    printf '{"mime":%s,"width":%d,"height":%d}' "$(json_string <<<"$mime")" $w $h
+    json_contents $doc/paper.$page.jpg '' ${mode/D}
  fi
  ;;
 R)