(相关资料图)
测试环境
Python 3.6
Win10
代码实现
#!/usr/bin/env python 3.4.0#-*- encoding:utf-8 -*-__author__ = "shouke"import xml.etree.ElementTree as ETdef compare_xml_node_attributes(xml_node1, xml_node2): result = [] node1_attributes_dict = xml_node1.attrib node2_attributes_dict = xml_node2.attrib for attrib1, value in node1_attributes_dict.items(): value2 = node2_attributes_dict.get(attrib1) if value == value2: node2_attributes_dict.pop(attrib1) else: if value2: attrib2 = attrib1 node2_attributes_dict.pop(attrib2) else: attrib2 = "不存在" result.append("结点1属性:{attrib1} 值:{value1},结点2属性:{attrib1} 值:{value2}".format(attrib1=attrib1 or "不存在", value1=value or "不存在", attrib2=attrib2, value2=value2 or "不存在")) for attrib2, value2 in node2_attributes_dict.items(): result.append("结点1属性:{attrib1} 值:{value1},结点2属性:{attrib1} 值:{value2}".format(attrib1="不存在", value1="不存在", attrib2=attrib2, value2=value2)) return resultdef compare_xml_node_children(xml_node1, xml_node2, node1_xpath, node2_xpath): def get_node_children(xml_node, node_xpath): result = {} for child in list(xml_node): if child.tag not in result: result[child.tag] = [{"node":child, "xpath": "%s/%s[%s]" % (node_xpath, child.tag, 1)}] else: result[child.tag].append({"node":child, "xpath": "%s/%s[%s]" % (node_xpath, child.tag, len(result[child.tag])+1)}) return result result = [] children_of_node1_dict = get_node_children(xml_node1, node1_xpath) children_of_node2_dict = get_node_children(xml_node2, node2_xpath) temp_list1 = [] temp_list2 = [] for child_tag, child_node_list in children_of_node1_dict.items(): second_child_node_list = children_of_node2_dict.get(child_tag, []) if not second_child_node_list: # 获取xml1中比xml2中多出的子结点 for i in range(0, len(child_node_list)): temp_list1.append("%s/%s[%s]" % (node1_xpath, child_node_list[i]["node"].tag, i+1)) continue for first_child, second_child in zip(child_node_list, second_child_node_list): result.extend(compare_xml_nodes(first_child["node"], second_child["node"], first_child["xpath"], second_child["xpath"])) # 获取xml2中对应结点比xml1中对应结点多出的同名子结点 for i in range(len(child_node_list), len(second_child_node_list)): temp_list2.append("%s/%s[%s]" % (node2_xpath, second_child_node_list[i]["node"].tag, i+1)) children_of_node2_dict.pop(child_tag) if temp_list1: result.append("子结点不一样:xml1结点(xpath:{xpath1})比xml2结点(xpath:{xpath2})多了以下子结点:\n{differences}".format (xpath1=node1_xpath, xpath2=node2_xpath, differences="\n".join(temp_list1))) # 获取xml2比xml1中多出的子结点 for child_tag, child_node_list in children_of_node2_dict.items(): for i in range(0, len(child_node_list)): temp_list2.append("%s/%s[%s]" % (node1_xpath, child_node_list[i]["node"].tag, i+1)) if temp_list2: result.append("子结点不一样:xml1结点(xpath:{xpath1})比xml2结点(xpath:{xpath2})少了以下子结点:\n{differences}".format (xpath1=node1_xpath, xpath2=node2_xpath, differences="\n".join(temp_list2))) return resultdef compare_xml_nodes(xml_node1, xml_node2, node1_xpath="", node2_xpath=""): result = [] # 比较标签 if xml_node1.tag != xml_node2.tag: result.append("标签不一样:xml1结点(xpath:{xpath1}):{tag1},xml2结点(xpath:{xpath2}):{tag2}".format (xpath1=node1_xpath, tag1=xml_node1.tag, xpath2=node2_xpath, tag2=xml_node2.tag)) # 比较文本 if xml_node1.text != xml_node2.text: result.append("文本不一样:xml1结点(xpath:{xpath1}):{text1},xml2结点(xpath:{xpath2}):{text2}".format (xpath1=node1_xpath, tag1=xml_node1.text or "", xpath2=node2_xpath, tag2=xml_node2.text or "")) # 比较属性 res = compare_xml_node_attributes(xml_node1, xml_node2) if res: result.append("属性不一样:xml1结点(xpath:{xpath1}),xml2结点(xpath:{xpath2}):\n{differences}".format (xpath1=node1_xpath, xpath2=node2_xpath, differences="\n".join(res))) # 比较子结点 res = compare_xml_node_children(xml_node1, xml_node2, node1_xpath, node2_xpath) if res: result.extend(res) return resultdef compare_xml_strs(xml1_str, xml2_str, mode=3): """ @param: mode 比较模式,预留,暂时没用。目前默认 xml 子元素如果为列表,则列表有序列表,按序比较 """ root1 = ET.fromstring(xml1_str.strip()) root2 = ET.fromstring(xml2_str.strip()) return compare_xml_nodes(root1, root2, "/%s" % root1.tag, "/%s" % root2.tag)
测试运行
xml_str1 = """ 1 2008 141100 4 2011 59900 68 2011 13600 """xml_str2 = """ 1 2008 141100 4 2011 59900 68 2011 13600 """xml_str3 = """ 1 unknow sz 2 unknown 3 unknown other addr """xml_str4 = """ unknow sz unknown other addr """if __name__ == "__main__": res_list = compare_xml_strs(xml_str1, xml_str2) if res_list: print("xml1和xml2不一样:\n%s" % "\n".join(res_list)) else: print("xml1和xml2一样") res_list = compare_xml_strs(xml_str3, xml_str4) if res_list: print("xml3和xml4不一样:\n%s" % "\n".join(res_list)) else: print("xml3和xml4一样")
运行结果
xml1和xml2一样xml3和xml4不一样:子结点不一样:xml1结点(xpath:/data/class[1])比xml2结点(xpath:/data/class[1])多了以下子结点:/data/class[1]/rangk[1]属性不一样:xml1结点(xpath:/data/class[2]/book[2]),xml2结点(xpath:/data/class[2]/book[2]):结点1属性:price 值:15,结点2属性:price 值:16子结点不一样:xml1结点(xpath:/data/class[2])比xml2结点(xpath:/data/class[2])多了以下子结点:/data/class[2]/rangk[1]子结点不一样:xml1结点(xpath:/data/class[2])比xml2结点(xpath:/data/class[2])少了以下子结点:/data/class[2]/addr[1]