I have a xml file in which content looks like this:
xml_content_to_search =
<Document ProviderID="TD" DecimalMarker="comma" Website="https://erc-viewer.sap.com/">
<available_substances><substance ID="0004" DD="14" MM="10" YYYY="2010"><SubName>0004</SubName><url>./UN/0004.xml</url><group>ADR0004_0101</group><group>THP0004Y0101</group><group>THC0004Y0101</group><group>TRP0004Y0101</group><group>TRC0004Y0101</group><group>TIP0004Y0101</group><group>TIC0004Y0101</group><group>CTR0004Y0102</group><group>CRP0004Y0102</group><group>CRC0004Y0102</group></substance><substance ID="ADR0004_0101" DD="26" MM="10" YYYY="2022"><SubName>asa</SubName><url>ADR/ADR0004_0101.xml</url></substance><substance ID="THP0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd)</SubName><url>THP/THP0004Y0101.xml</url></substance><substance ID="THC0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd</SubName><url>THC/THC0004Y0101.xml</url></substance><substance ID="TRP0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd</SubName><url>TRP/TRP0004Y0101.xml</url></substance><substance ID="TRC0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd</SubName><url>TRC/TRC0004Y0101.xml</url></substance></available_substances></Document>
I want to search for a specific substance id in xml file and then duplicate it and do some manipulation and I am able to implement it. But after duplicating I want to insert that duplicated element right below the substance id from which it was duplicated.
This is my code:
# Use the os.listdir() method to list all files in the specified folder and filter for XML files
for filename in os.listdir(IAC_files_path):if filename.endswith(".xml"):# Remove the ".xml" extension before adding to the listxml_file_names.append(os.path.splitext(filename)[0])# Parse the XML content to search for <substance> elements with matching IDs
tree = ET.ElementTree(ET.fromstring(xml_content_to_search))
root = tree.getroot()# Initialize a flag to check if at least one match is found
match_found = False# Create a list to store duplicated <substance> elements
duplicated_substance_elements = []# Iterate through the <substance> elements and search for matching IDs
for substance_element in root.findall(".//substance"):substance_id = substance_element.get("ID")print(f"Processing substance_id: {substance_id}")# Check if the ID without the extension is in the listbase_substance_id = os.path.splitext(substance_id)[0]if base_substance_id in xml_file_names:# Print the XML file name found in the <substance> element's ID attributeprint(f"Found XML file name '{substance_id}' in the other XML file.")match_found = True# Create a new <substance> element with modified attributes for IUCduplicate_substance_element_iuc = ET.Element("substance")duplicate_substance_element_iuc.set("ID", base_substance_id.replace("IAC", "IUC"))duplicate_substance_element_iuc.set("DD", substance_element.get("DD"))duplicate_substance_element_iuc.set("MM", substance_element.get("MM"))duplicate_substance_element_iuc.set("YYYY", substance_element.get("YYYY"))# Duplicate and modify the <SubName> element for IUCsubname_element = substance_element.find("SubName")duplicate_subname_element_iuc = ET.Element("SubName")duplicate_subname_element_iuc.text = subname_element.text.replace("IAC", "IUC")duplicate_substance_element_iuc.append(duplicate_subname_element_iuc)# Duplicate and modify the <url> element for IUCurl_element = substance_element.find("url")duplicate_url_element_iuc = ET.Element("url")duplicate_url_element_iuc.text = url_element.text.replace("IAC", "IUC")duplicate_substance_element_iuc.append(duplicate_url_element_iuc)# Insert the duplicated IUC <substance> element immediately after the original IAC elementsubstance_element_index = list(root).index(substance_element)root.insert(substance_element_index + 1, duplicate_substance_element_iuc)# Create a new <substance> element with modified attributes for IECduplicate_substance_element_iec = ET.Element("substance")duplicate_substance_element_iec.set("ID", base_substance_id.replace("IAC", "IEC"))duplicate_substance_element_iec.set("DD", substance_element.get("DD"))duplicate_substance_element_iec.set("MM", substance_element.get("MM"))duplicate_substance_element_iec.set("YYYY", substance_element.get("YYYY"))# Duplicate and modify the <SubName> element for IECduplicate_subname_element_iec = ET.Element("SubName")duplicate_subname_element_iec.text = subname_element.text.replace("IAC", "IEC")duplicate_substance_element_iec.append(duplicate_subname_element_iec)# Duplicate and modify the <url> element for IECduplicate_url_element_iec = ET.Element("url")duplicate_url_element_iec.text = url_element.text.replace("IAC", "IEC")duplicate_substance_element_iec.append(duplicate_url_element_iec)# Insert the duplicated IUC <substance> element immediately after the original IAC elementsubstance_element_index = list(root).index(substance_element)root.insert(substance_element_index + 2, duplicate_substance_element_iec)# Append the duplicated IEC <substance> element to the list#duplicated_substance_elements.append(duplicate_substance_element_iec)# Check if no matches were found and print "Not found" message
if not match_found:print("No XML file names were found in the other XML file.")# # Append the duplicated IEC <substance> elements to the end
# for duplicate_element in duplicated_substance_elements:
# root.append(duplicate_element)# Print the modified XML content
modified_xml_content = ET.tostring(root, encoding="unicode")
print(modified_xml_content)
I am getting this error :
<Element 'substance' at 0x000002BF2DFE8720> is not in list
at this line of code
substance_element_index = list(root).index(substance_element)
My desired output is something like this:
<Document ProviderID="TD" DecimalMarker="comma" Website="https://erc-viewer.sap.com/">
<available_substances><substance ID="0004" DD="14" MM="10" YYYY="2010"><SubName>0004</SubName><url>./UN/0004.xml</url><group>ADR0004_0101</group><group>THP0004Y0101</group><group>THC0004Y0101</group><group>TRP0004Y0101</group><group>TRC0004Y0101</group><group>TIP0004Y0101</group><group>TIC0004Y0101</group><group>CTR0004Y0102</group><group>CRP0004Y0102</group><group>CRC0004Y0102</group></substance><substance ID="ADR0004_0101" DD="26" MM="10" YYYY="2022"><SubName>asa</SubName><url>ADR/ADR0004_0101.xml</url></substance><substance ID="THP0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd)</SubName><url>THP/THP0004Y0101.xml</url></substance><substance ID="THC0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd</SubName><url>THC/THC0004Y0101.xml</url></substance><substance ID="TRP0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd</SubName><url>TRP/TRP0004Y0101.xml</url></substance><substance ID="TRC0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd</SubName><url>TRC/TRC0004Y0101.xml</url></substance>**<substance ID="IEC0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd</SubName><url>TRC/TRC0004Y0101.xml</url></substance>**</available_substances></Document>
I have a xml file in which content looks like this:
xml_content_to_search =
<Document ProviderID="TD" DecimalMarker="comma" Website="https://erc-viewer.sap.com/">
<available_substances><substance ID="0004" DD="14" MM="10" YYYY="2010"><SubName>0004</SubName><url>./UN/0004.xml</url><group>ADR0004_0101</group><group>THP0004Y0101</group><group>THC0004Y0101</group><group>TRP0004Y0101</group><group>TRC0004Y0101</group><group>TIP0004Y0101</group><group>TIC0004Y0101</group><group>CTR0004Y0102</group><group>CRP0004Y0102</group><group>CRC0004Y0102</group></substance><substance ID="ADR0004_0101" DD="26" MM="10" YYYY="2022"><SubName>asa</SubName><url>ADR/ADR0004_0101.xml</url></substance><substance ID="THP0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd)</SubName><url>THP/THP0004Y0101.xml</url></substance><substance ID="THC0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd</SubName><url>THC/THC0004Y0101.xml</url></substance><substance ID="TRP0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd</SubName><url>TRP/TRP0004Y0101.xml</url></substance><substance ID="TRC0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd</SubName><url>TRC/TRC0004Y0101.xml</url></substance></available_substances></Document>
I want to search for a specific substance id in xml file and then duplicate it and do some manipulation and I am able to implement it. But after duplicating I want to insert that duplicated element right below the substance id from which it was duplicated.
This is my code:
# Use the os.listdir() method to list all files in the specified folder and filter for XML files
for filename in os.listdir(IAC_files_path):if filename.endswith(".xml"):# Remove the ".xml" extension before adding to the listxml_file_names.append(os.path.splitext(filename)[0])# Parse the XML content to search for <substance> elements with matching IDs
tree = ET.ElementTree(ET.fromstring(xml_content_to_search))
root = tree.getroot()# Initialize a flag to check if at least one match is found
match_found = False# Create a list to store duplicated <substance> elements
duplicated_substance_elements = []# Iterate through the <substance> elements and search for matching IDs
for substance_element in root.findall(".//substance"):substance_id = substance_element.get("ID")print(f"Processing substance_id: {substance_id}")# Check if the ID without the extension is in the listbase_substance_id = os.path.splitext(substance_id)[0]if base_substance_id in xml_file_names:# Print the XML file name found in the <substance> element's ID attributeprint(f"Found XML file name '{substance_id}' in the other XML file.")match_found = True# Create a new <substance> element with modified attributes for IUCduplicate_substance_element_iuc = ET.Element("substance")duplicate_substance_element_iuc.set("ID", base_substance_id.replace("IAC", "IUC"))duplicate_substance_element_iuc.set("DD", substance_element.get("DD"))duplicate_substance_element_iuc.set("MM", substance_element.get("MM"))duplicate_substance_element_iuc.set("YYYY", substance_element.get("YYYY"))# Duplicate and modify the <SubName> element for IUCsubname_element = substance_element.find("SubName")duplicate_subname_element_iuc = ET.Element("SubName")duplicate_subname_element_iuc.text = subname_element.text.replace("IAC", "IUC")duplicate_substance_element_iuc.append(duplicate_subname_element_iuc)# Duplicate and modify the <url> element for IUCurl_element = substance_element.find("url")duplicate_url_element_iuc = ET.Element("url")duplicate_url_element_iuc.text = url_element.text.replace("IAC", "IUC")duplicate_substance_element_iuc.append(duplicate_url_element_iuc)# Insert the duplicated IUC <substance> element immediately after the original IAC elementsubstance_element_index = list(root).index(substance_element)root.insert(substance_element_index + 1, duplicate_substance_element_iuc)# Create a new <substance> element with modified attributes for IECduplicate_substance_element_iec = ET.Element("substance")duplicate_substance_element_iec.set("ID", base_substance_id.replace("IAC", "IEC"))duplicate_substance_element_iec.set("DD", substance_element.get("DD"))duplicate_substance_element_iec.set("MM", substance_element.get("MM"))duplicate_substance_element_iec.set("YYYY", substance_element.get("YYYY"))# Duplicate and modify the <SubName> element for IECduplicate_subname_element_iec = ET.Element("SubName")duplicate_subname_element_iec.text = subname_element.text.replace("IAC", "IEC")duplicate_substance_element_iec.append(duplicate_subname_element_iec)# Duplicate and modify the <url> element for IECduplicate_url_element_iec = ET.Element("url")duplicate_url_element_iec.text = url_element.text.replace("IAC", "IEC")duplicate_substance_element_iec.append(duplicate_url_element_iec)# Insert the duplicated IUC <substance> element immediately after the original IAC elementsubstance_element_index = list(root).index(substance_element)root.insert(substance_element_index + 2, duplicate_substance_element_iec)# Append the duplicated IEC <substance> element to the list#duplicated_substance_elements.append(duplicate_substance_element_iec)# Check if no matches were found and print "Not found" message
if not match_found:print("No XML file names were found in the other XML file.")# # Append the duplicated IEC <substance> elements to the end
# for duplicate_element in duplicated_substance_elements:
# root.append(duplicate_element)# Print the modified XML content
modified_xml_content = ET.tostring(root, encoding="unicode")
print(modified_xml_content)
I am getting this error :
<Element 'substance' at 0x000002BF2DFE8720> is not in list
at this line of code
substance_element_index = list(root).index(substance_element)
My desired output is something like this:
<Document ProviderID="TD" DecimalMarker="comma" Website="https://erc-viewer.sap.com/">
<available_substances><substance ID="0004" DD="14" MM="10" YYYY="2010"><SubName>0004</SubName><url>./UN/0004.xml</url><group>ADR0004_0101</group><group>THP0004Y0101</group><group>THC0004Y0101</group><group>TRP0004Y0101</group><group>TRC0004Y0101</group><group>TIP0004Y0101</group><group>TIC0004Y0101</group><group>CTR0004Y0102</group><group>CRP0004Y0102</group><group>CRC0004Y0102</group></substance><substance ID="ADR0004_0101" DD="26" MM="10" YYYY="2022"><SubName>asa</SubName><url>ADR/ADR0004_0101.xml</url></substance><substance ID="THP0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd)</SubName><url>THP/THP0004Y0101.xml</url></substance><substance ID="THC0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd</SubName><url>THC/THC0004Y0101.xml</url></substance><substance ID="TRP0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd</SubName><url>TRP/TRP0004Y0101.xml</url></substance><substance ID="TRC0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd</SubName><url>TRC/TRC0004Y0101.xml</url></substance>**<substance ID="IEC0004Y0101" DD="26" MM="10" YYYY="2020"><SubName>asd</SubName><url>TRC/TRC0004Y0101.xml</url></substance>**</available_substances></Document>