file deduplication

This commit is contained in:
Pratik Vyas 2014-03-05 16:59:45 +05:30
parent 5103366eb9
commit e30dfd4ed0
4 changed files with 19 additions and 5 deletions

View file

@ -44,8 +44,8 @@ class FileData(Document):
pass
# if file not attached to any other record, delete it
if self.file_name and not frappe.db.count("File Data",
{"file_name": self.file_name, "name": ["!=", self.name]}):
if self.doc.file_name and not frappe.db.count("File Data",
{"content_hash": self.content_hash, "name": ["!=", self.name]}):
delete_file_data_content(self)
def on_rollback(self):

View file

@ -48,3 +48,5 @@ doc_event:Website Route Permission:on_update = frappe.templates.generators.websi
doc_event:*:on_update = frappe.core.doctype.notification_count.notification_count.clear_doctype_notifications
doc_event:*:on_cancel = frappe.core.doctype.notification_count.notification_count.clear_doctype_notifications
doc_event:*:on_trash = frappe.core.doctype.notification_count.notification_count.clear_doctype_notifications
write_file_keys = file_url

View file

@ -6,6 +6,7 @@ from __future__ import unicode_literals
import frappe
import os
from frappe.utils import get_files_path
from frappe.utils.filemanager import get_content_hash, get_file
def execute():
for name, file_name, file_url in frappe.db.sql(
@ -18,5 +19,7 @@ def execute():
b.doc.file_url = os.path.normpath('/' + old_file_name)
else:
b.doc.file_url = os.path.normpath('/files/' + old_file_name)
_file_name, content = get_file(file_name)
b.doc.content_hash = get_content_hash(content)
b.save()

View file

@ -107,14 +107,17 @@ def save_file(fname, content, dt, dn, decode=False):
method = get_hook_method('write_file', fallback=save_file_on_filesystem)
file_data = method(fname, content, content_type=content_type)
file_data = copy(file_data)
file_data = get_file_data_from_hash(content_hash)
if not file_data:
file_data = method(fname, content, content_type=content_type)
file_data = copy(file_data)
file_data.update({
"doctype": "File Data",
"attached_to_doctype": dt,
"attached_to_name": dn,
"file_size": file_size,
"file_hash": content_hash
"file_hash": content_hash,
"file_name": fname
})
f = frappe.bean(file_data)
@ -125,6 +128,12 @@ def save_file(fname, content, dt, dn, decode=False):
return frappe.doc("File Data", f.doc.duplicate_entry)
return f.doc
def get_file_data_from_hash(content_hash):
for name in frappe.db.sql_list("select name from `tabFile Data` where content_hash='{}'".format(content_hash)):
b = frappe.bean('File Data', name)
return {k:b.doc.fields[k] for k in frappe.get_hooks()['write_file_keys']}
return False
def save_file_on_filesystem(fname, content, content_type=None):
import filecmp