Code development platform for open source projects from the European Union institutions

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ai4xml/playground
1 result
Show changes
Commits on Source (2)
......@@ -79,10 +79,7 @@ def extract_preface(content):
return None
def extract_preamble_from_text(content, file_path):
import re
print("processing ", file_path)
if "32019R0537" in file_path:
print("just breakpoint!")
# Pattern to capture the preamble components
pattern = r"((?:THE EUROPEAN COMMISSION,|THE COUNCIL OF THE EUROPEAN UNION,|THE EUROPEAN PARLIAMENT AND THE COUNCIL OF THE EUROPEAN UNION,|THE EUROPEAN PARLIAMENT AND THE COUNCIL OF THE EUROPEAN UNION|THE EUROPEAN COMMISSION|THE GOVERNING COUNCIL OF THE EUROPEAN CENTRAL BANK,|THE COUNCIL OF THE EUROPEAN UNION|THE COUNCIL OF THE OPEAN UNION,|THE EUROPEAN PARLIAMENT,|THE EUROPEAN COMMUNITIES,|THE EUROPEAN PARLIAMENT AND THE COUNCIL,|The GOVERNING COUNCIL OF THE EUROPEAN CENTRAL BANK,|THE MANAGEMENT BOARD,).*?)(?=HAS ADOPTED THIS REGULATION:|HAVE ADOPTED THIS REGULATION:|HAS ADOPTED THIS DIRECTIVE:|HAVE ADOPTED THIS DIRECTIVE:|HAS ADOPTED THIS DECISION:|HAVE ADOPTED THIS DECISION:|HAS ADOPTED THE FOLLOWING REGULATION:)"
......@@ -101,16 +98,23 @@ def extract_preamble_from_text(content, file_path):
citations_part, recitals_part = extract_citations_and_recitals(preamble)
citations = extract_citations(citations_part)
if citations and citations[0] == formula:
citations = citations[1:] # Remove the formula from citations
else:
print("formula is not the first citation in the citations part of file: ", file_path)
print("formula: ", formula)
print("citations: ", citations)
recitals = extract_recitals(recitals_part)
# Extract preamble final using a separate function
preamble_final = extract_preamble_final(content)
return {
"formula": formula.strip(),
"citations": citations,
"recitals": recitals,
"preamble_final": preamble_final.strip()
"preamble_final": preamble_final.strip(),
}
else:
print("No preamble found in the content of file: ", file_path)
......@@ -143,7 +147,6 @@ def extract_formula(preamble):
return formula_match.group(1) if formula_match else None
def extract_citations_and_recitals(preamble):
import re
# Split the preamble into citations and recitals using 'Whereas' as the separator
splitters = ['Whereas:', 'WHEREAS:', 'Whereas,', 'Whereas','whereas:']
......@@ -166,22 +169,36 @@ def extract_citations(citations_part):
return []
def extract_recitals(recitals_text):
import re
recitals = {}
duplicates = []
# Corrected pattern to match actual new lines and the digit in parentheses
pattern = r'\n\n\\\((\d+)\\\)'
pattern = r'\n\n(?:\\\((\d+)\\\)|\((\d+)\))'
# Find all matches of the pattern
matches = list(re.finditer(pattern, recitals_text))
for i in range(len(matches)):
start = matches[i].end()
start = matches[i].start()
end = matches[i+1].start() if i+1 < len(matches) else len(recitals_text)
recital_number = int(matches[i].group(1))
recital_number = int(matches[i].group(1) or matches[i].group(2))
recital_text = recitals_text[start:end].strip()
recitals[recital_number] = recital_text
# THERE is bug in md conveter, some documents have recital incorrect numbers, see doc : 32017R1270
if recital_number in recitals:
duplicates.append(recital_number)
# Extract the number value from the beginning of the recital text
number_match = re.match(r'(?:\\\((\d+)\\\)|\((\d+)\))', recital_text)
if number_match:
corrected_number = max(recitals.keys()) + 1
recital_text = re.sub(r'(?:\\\(\d+\\\)|\(\d+\))', f'({corrected_number})', recital_text)
recitals[corrected_number] = recital_text
else:
print(f"Recital with no number found: {recital_text}")
else:
recitals[recital_number] = recital_text
if duplicates:
print(f"Recitals with duplicate numbers: {', '.join(map(str, duplicates))}")
return recitals
......@@ -211,11 +228,16 @@ def extract_preamble_from_xml(xml_content):
num = num_element.text if num_element is not None else ""
text = " ".join(p.text for p in recital.findall(".//{*}p") if p.text)
recital_texts.append(f"{num} {text}")
# Extract preamble_final
preamble_final = preamble.find(".//{*}block[@name='preamble.final']")
preamble_final_text = preamble_final.text if preamble_final is not None else ""
return {
"formula": formula_text,
"citations": citation_texts,
"recitals": recital_texts
"recitals": recital_texts,
"preamble_final": preamble_final_text
}
from tqdm import tqdm
......@@ -254,10 +276,11 @@ def analyse_preamble_dataset_from_xml(xml_files, output_file):
last_tag_content[xml_file] = extract_last_tag_or_content(preamble_data['recitals'], xml_content)
preamble_dataset.append({
'document': xml_file,
'celex_id': os.path.splitext(os.path.basename(xml_file))[0],
'formula': preamble_data['formula'],
'citations': preamble_data['citations'],
'recitals': preamble_data['recitals']
'recitals': preamble_data['recitals'],
'preamble_final': preamble_data['preamble_final']
})
# Count unique tags in the preamble content
......@@ -334,27 +357,54 @@ def extract_last_tag_or_content(recitals, xml_content):
return "No content found between the last recital and the preamble end tag."
def create_preamble_dataset_from_content(content_files, output_file):
def create_preamble_dataset_from_md(content_files, output_file):
preamble_dataset = []
for file_path in content_files:
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
filename_no_ext = os.path.splitext(os.path.basename(file_path))[0]
if filename_no_ext == "32020R1080":
print(content)
# Check if the content starts with an image reference
if content.strip().startswith("![]("):
print(f"Ignoring file {file_path} as it starts with an image reference.")
continue
preamble_data = extract_preamble_from_text(content, file_path)
if preamble_data:
# Remove the file extension from the filename
filename_no_ext = os.path.splitext(os.path.basename(file_path))[0]
preamble_dataset.append({
'celex_id': filename_no_ext,
'formula': preamble_data['formula'],
'citations': preamble_data['citations'],
'recitals': preamble_data['recitals'],
'final_phrase': preamble_data['preamble_final']
'preamble_final': preamble_data['preamble_final']
})
with open(output_file, 'w', encoding='utf-8') as jsonfile:
json.dump(preamble_dataset, jsonfile, indent=2)
def create_preamble_dataset_from_xml(xml_directory, output_file):
preamble_dataset = []
for filename in os.listdir(xml_directory):
if filename.endswith('.xml'):
file_path = os.path.join(xml_directory, filename)
celex_id = os.path.splitext(filename)[0] # Remove .xml extension
with open(file_path, 'r', encoding='utf-8') as file:
xml_content = file.read()
preamble_data = extract_preamble_from_xml(xml_content)
if preamble_data:
preamble_dataset.append({
'celex_id': celex_id,
'formula': preamble_data['formula'],
'citations': preamble_data['citations'],
'recitals': preamble_data['recitals']
})
with open(output_file, 'w', encoding='utf-8') as jsonfile:
json.dump(preamble_dataset, jsonfile, indent=2)
def remove_namespaces(xml_element):
......@@ -682,4 +732,244 @@ def validate_akn(xml_content_or_path, schema, is_content=True):
xml_doc = etree.fromstring(xml_content_or_path)
else:
xml_doc = etree.parse(xml_content_or_path)
return schema.validate(xml_doc), schema.error_log
\ No newline at end of file
return schema.validate(xml_doc), schema.error_log
# ------- Similarity functions
import string
from fuzzywuzzy import fuzz
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from difflib import SequenceMatcher
def calculate_levenshtein_score(text1, text2):
return fuzz.ratio(text1, text2) / 100
def calculate_cosine_similarity(text1, text2):
vectorizer = TfidfVectorizer()
results = vectorizer.fit_transform([text1, text2])
return cosine_similarity(results[0], results[1])[0][0]
def calculate_sequence_matcher_score(text1, text2):
return SequenceMatcher(lambda x: x in string.punctuation + string.whitespace, text1, text2).ratio()
def jaccard_similarity(s1, s2):
set1 = set(s1.lower().split())
set2 = set(s2.lower().split())
return len(set1.intersection(set2)) / len(set1.union(set2))
# ---------------
def obsfucate(var):
return f"{var[:4]}{'*' * (len(var) - 4)}"
def check_env_vars():
aws_vars = ['OPENAI_API_KEY', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'AWS_SESSION_TOKEN', 'AWS_REGION_NAME']
for var in aws_vars:
value = os.environ.get(var)
if value:
print(f"{var} is set. Value: {obsfucate(value)}")
else:
print(f"{var} is not set.")
import dspy
def get_llms(cache=True):
llms = {
"us.meta.llama3-2-1b-instruct-v1:0": dspy.LM(
model="bedrock/us.meta.llama3-2-1b-instruct-v1:0",cache=cache
),
"us.meta.llama3-2-3b-instruct-v1:0": dspy.LM(
model="bedrock/us.meta.llama3-2-3b-instruct-v1:0",cache=cache
),
# "us.meta.llama3-2-11b-instruct-v1:0": dspy.LM(
# model="bedrock/us.meta.llama3-2-11b-instruct-v1:0",
# ),
# "meta.llama3-1-70b-instruct-v1:0": dspy.LM(
# model="bedrock/meta.llama3-1-70b-instruct-v1:0",
# ),
# "meta.llama3-1-8b-instruct-v1:0": dspy.LM(
# model="bedrock/meta.llama3-1-8b-instruct-v1:0",
# ),
"mistral.mistral-7b-instruct-v0:2": dspy.LM(
model="bedrock/mistral.mistral-7b-instruct-v0:2",cache=cache
),
"mistral.mixtral-8x7b-instruct-v0:1": dspy.LM(
model="bedrock/mistral.mixtral-8x7b-instruct-v0:1",cache=cache
),
'mistral.mistral-large-2402-v1:0': dspy.LM(
model="bedrock/mistral.mistral-large-2402-v1:0",cache=cache
),
'mistral.mistral-small-2402-v1:0': dspy.LM(
model="bedrock/mistral.mistral-small-2402-v1:0",cache=cache
),
# "gpt4o-mini" : dspy.LM('openai/gpt-4o-mini'),
}
return llms
# --------------- dataset
import pandas as pd
from dspy.datasets.dataset import Dataset
class AKNDataset(Dataset):
def __init__(self, file_path, similarity_threshold=0.95, train_size=700, dev_size=100, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
df = pd.read_json(file_path)
filtered_df = df[df['similarity'] > similarity_threshold]
self._train = filtered_df.iloc[:train_size].to_dict(orient='records')
self._dev = filtered_df.iloc[train_size:train_size+dev_size].to_dict(orient='records')
# --------------- metrics
from rouge_score import rouge_scorer
import xml.etree.ElementTree as ET
import lxml.etree as etree
import os
def insert_xml_into_akn_empty(xml_content):
try:
# Determine the path to the empty AKN file based on the current working directory
current_dir = os.path.dirname(os.path.abspath(__file__))
akn_empty_path = os.path.join(current_dir, 'data/akn_files/akn_empty.xml')
# Load the empty AKN file
tree = etree.parse(akn_empty_path)
root = tree.getroot()
# Parse the generated XML content
generated_xml = etree.fromstring(xml_content)
# Find the meta element
meta_element = root.find('.//{http://docs.oasis-open.org/legaldocml/ns/akn/3.0}meta')
if meta_element is not None:
# Insert the generated XML after the meta element
meta_element.addnext(generated_xml)
else:
print("Meta element not found in the empty AKN file.")
return None, "Meta element not found in the empty AKN file."
# Return the modified AKN XML as a string
return etree.tostring(root, encoding='unicode'), None
except Exception as e:
return None, str(e)
import logging
# Configure logging
def configure_program_compiling_logging(model_name):
log_filename = f'{model_name}_program_compiling.log'
logging.basicConfig(filename=log_filename, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
return logging.getLogger()
def validation_metric(reference, prediction, trace=None):
# Insert the generated XML into the empty AKN file
logging.info(f"reference: {reference['celex_id']}")
reference_xml = reference['xml']
reference_text = reference['text']
generated_xml = prediction['xml']
akn_xml, error = insert_xml_into_akn_empty(generated_xml)
if akn_xml is None:
logging.error(error)
return False
# Validate XML structure using validate_akn function
current_dir = os.path.dirname(os.path.abspath(__file__))
schema_file_path = os.path.join(current_dir, 'data/akn_files/schema/akomantoso30.xsd')
schema = etree.XMLSchema(file=schema_file_path)
is_valid, error_log = validate_akn(akn_xml, schema)
if not is_valid:
logging.error("XML is not valid according to AKN schema")
logging.error(error_log)
return False # Return false if XML is not valid
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
scores = scorer.score(reference_xml, generated_xml)
# Extracting the F1 scores from the results
rouge1_f1 = scores['rouge1'].fmeasure
rougeL_f1 = scores['rougeL'].fmeasure
logging.info("rougeL_f1: %s", rougeL_f1)
if rougeL_f1 <= 0.9:
logging.warning(f"Low ROUGE-L F1 Score Detected: {rougeL_f1}")
logging.warning("-" * 10)
logging.warning("Reference Text:\n%s", reference_text)
logging.warning("-" * 10)
logging.warning("Reference XML:\n%s", reference_xml)
logging.warning("-" * 10)
logging.warning("Predicted XML:\n%s", generated_xml)
logging.warning("-" * 100)
# Setting a threshold for ROUGE-L
return rougeL_f1 >= 0.96 # Threshold can be adjusted as needed
def validation_metric_p2(reference, prediction, trace=None):
# Insert the generated XML into the empty AKN file
logging.info(f"reference: {reference['celex_id']}")
reference_xml = reference['xml']
# reference_text = reference['text']
generated_xml = prediction['xml']
akn_xml, error = insert_xml_into_akn_empty(generated_xml)
if akn_xml is None:
logging.error(error)
return False
# Validate XML structure using validate_akn function
current_dir = os.path.dirname(os.path.abspath(__file__))
schema_file_path = os.path.join(current_dir, 'data/akn_files/schema/akomantoso30.xsd')
schema = etree.XMLSchema(file=schema_file_path)
is_valid, error_log = validate_akn(akn_xml, schema)
if not is_valid:
logging.error("XML is not valid according to AKN schema")
logging.error(error_log)
return False # Return false if XML is not valid
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
scores = scorer.score(reference_xml, generated_xml)
# Extracting the F1 scores from the results
rouge1_f1 = scores['rouge1'].fmeasure
rougeL_f1 = scores['rougeL'].fmeasure
logging.info("rougeL_f1: %s", rougeL_f1)
if rougeL_f1 <= 0.9:
logging.warning(f"Low ROUGE-L F1 Score Detected: {rougeL_f1}")
logging.warning("-" * 10)
# logging.warning("Reference Text:\n%s", reference_text)
logging.warning("-" * 10)
logging.warning("Reference XML:\n%s", reference_xml)
logging.warning("-" * 10)
logging.warning("Predicted XML:\n%s", generated_xml)
logging.warning("-" * 100)
# Setting a threshold for ROUGE-L
return rougeL_f1 >= 0.96
# ---------------
def setup_phoenix():
import phoenix as px
px.launch_app()
from openinference.instrumentation.dspy import DSPyInstrumentor
from openinference.instrumentation.litellm import LiteLLMInstrumentor
from phoenix.otel import register
register(endpoint="http://127.0.0.1:6006/v1/traces")
DSPyInstrumentor().instrument(skip_dep_check=True)
LiteLLMInstrumentor().instrument(skip_dep_check=True)
\ No newline at end of file
source diff could not be displayed: it is too large. Options to address this: view the blob.
source diff could not be displayed: it is too large. Options to address this: view the blob.
%% Cell type:markdown id: tags:
# Program P1: transform entire preamble from text to xml
%% Cell type:markdown id: tags:
Note: this program is discontinued due to context window limitations.
It is designed to assess that splitting the preamble into smaller chunks of text and transforming each chunk into XML is more efficient.
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
```
%% Cell type:code id: tags:
``` python
!pip install -q dspy-ai python-dotenv rouge-score
```
%% Output
[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: pip install --upgrade pip
%% Cell type:code id: tags:
``` python
!pip install -q git+https://github.com/stanfordnlp/dspy.git
```
%% Output
[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: pip install --upgrade pip
%% Cell type:code id: tags:
``` python
from importlib.metadata import version
print(f"dspy-ai version: {version('dspy-ai')}")
```
%% Output
dspy-ai version: 2.5.8
%% Cell type:code id: tags:
``` python
from dotenv import load_dotenv
import os
import json
import re
import sys
```
%% Cell type:code id: tags:
``` python
# Get the current working directory
current_dir = os.getcwd()
# Append the parent directory to sys.path
parent_dir = f"{os.path.dirname(current_dir)}/.."
sys.path.append(parent_dir)
# Now you can import from functions
from functions import *
```
%% Cell type:code id: tags:
``` python
load_dotenv()
```
%% Output
True
%% Cell type:markdown id: tags:
## Step 1: Setup
%% Cell type:code id: tags:
``` python
import dspy
llms = get_llms()
```
%% Cell type:markdown id: tags:
## Step 2: Define Signatures
%% Cell type:code id: tags:
``` python
class PreambleSignature(dspy.Signature):
"""Create an XML representation of a document preamble section in the Akoma Ntoso (AKN) format. It is composed of formula, citations and recitals."""
text = dspy.InputField(desc="Raw text format of the document preamble section")
xml = dspy.OutputField(desc="Akoma Ntoso (AKN) XML representation of the input preamble")
```
%% Cell type:markdown id: tags:
## Step 3: Building the Transformation Pipeline
%% Cell type:code id: tags:
``` python
class PreambleTransformationPipeline(dspy.Module):
def __init__(self):
super().__init__()
self.transform = dspy.ChainOfThought(PreambleSignature)
def extract_xml(self, content):
# This pattern looks for all <preface> tags and captures until </preface> including newlines and any characters between.
matches = re.findall(r'<preamble>.*?</preamble>', content, re.DOTALL)
if matches:
return matches[-1] # Return the last match
return "" # Return empty string if no XML part is found
def forward(self, text):
# Assuming there's some text to process, otherwise return an empty XML structure
if not text:
return ""
# Generate XML for the preamble
xml_preamble = self.transform(text=text)
# Extract the desired XML part
extracted_xml = self.extract_xml(xml_preamble.xml)
# TODO: assert number of citations/recitals/formulas.
# TODO: assert text similarity between the preamble text and the preamble text from the xml.
# Return the extracted XML
return {"xml": extracted_xml}
```
%% Cell type:markdown id: tags:
## Step 4: Executing the Pipeline (0-shot conversion without optimization)
%% Cell type:code id: tags:
``` python
preamble_pipeline = PreambleTransformationPipeline()
```
%% Cell type:markdown id: tags:
# Step 5: Dataset preparation
%% Cell type:code id: tags:
``` python
output_dir = '../../data/preambles/p1'
dataset_file = os.path.join(output_dir, 'dataset.json')
dataset = AKNDataset(dataset_file, similarity_threshold=0.95, train_size=50, dev_size=30, input_keys=['text'])
print(dataset.train[:3])
print(dataset.dev[:3])
```
%% Output
[Example({'celex_id': '32020R0693', 'text': 'THE EUROPEAN COMMISSION,\n\nHaving regard to the Treaty on the Functioning of the European Union,\n\nHaving regard to Regulation (EU) No 952/2013 of the European Parliament\nand of the Council of 9 October 2013 laying down the Union Customs\nCode\xa0[number="1",footnote="OJ L\xa0269, 10.10.2013, p.\xa01."], and in particular Article\xa057(4) and Article 58(2) thereof,\n\nWhereas:\n\n\\(1\\) In order to ensure uniform application of the Combined\nNomenclature annexed to Council Regulation (EEC) No 2658/87\xa0[number="2",footnote="Council Regulation (EEC) No 2658/87 of 23 July 1987 on the tariff\n and statistical nomenclature and on the Common Customs Tariff (OJ\n L\xa0256, 7.9.1987, p.\xa01)."], it is\nnecessary to adopt measures concerning the classification of the goods\nreferred to in the Annex to this Regulation.\n\n\\(2\\) Regulation (EEC) No 2658/87 has laid down the general rules for\nthe interpretation of the Combined Nomenclature. Those rules apply also\nto any other nomenclature which is wholly or partly based on it or which\nadds any additional subdivision to it and which is established by\nspecific provisions of the Union, with a view to the application of\ntariff and other measures relating to trade in goods.\n\n\\(3\\) Pursuant to those general rules, the goods described in column (1)\nof the table set out in the Annex should be classified under the CN code\nindicated in column (2), by virtue of the reasons set out in column (3)\nof that table.\n\n\\(4\\) It is appropriate to provide that binding tariff information\nissued in respect of the goods concerned by this Regulation which does\nnot conform to this Regulation may, for a certain period, continue to be\ninvoked by the holder in accordance with Article\xa034(9) of Regulation\n(EU) No 952/2013. That period should be set at three months.\n\n\\(5\\) The measures provided for in this Regulation are in accordance\nwith the opinion of the Customs Code Committee,\n\nHAS ADOPTED THIS REGULATION:', 'preamble_text_from_xml': 'THE EUROPEAN COMMISSION, Having regard to the Treaty on the Functioning of the European Union, Having regard to Regulation (EU) No 952/2013 of the European Parliament and of the Council of 9 October 2013 laying down the Union Customs Code, and in particular Article 57(4) and Article 58(2) thereof, Whereas: (1) In order to ensure uniform application of the Combined Nomenclature annexed to Council Regulation (EEC) No 2658/87, it is necessary to adopt measures concerning the classification of the goods referred to in the Annex to this Regulation. (2) Regulation (EEC) No 2658/87 has laid down the general rules for the interpretation of the Combined Nomenclature. Those rules apply also to any other nomenclature which is wholly or partly based on it or which adds any additional subdivision to it and which is established by specific provisions of the Union, with a view to the application of tariff and other measures relating to trade in goods. (3) Pursuant to those general rules, the goods described in column (1) of the table set out in the Annex should be classified under the CN code indicated in column (2), by virtue of the reasons set out in column (3) of that table. (4) It is appropriate to provide that binding tariff information issued in respect of the goods concerned by this Regulation which does not conform to this Regulation may, for a certain period, continue to be invoked by the holder in accordance with Article 34(9) of Regulation (EU) No 952/2013. That period should be set at three months. (5) The measures provided for in this Regulation are in accordance with the opinion of the Customs Code Committee, HAS ADOPTED THIS REGULATION:', 'xml': '<preamble>\n <formula name="preambleFormula">\n <p>THE EUROPEAN COMMISSION,</p>\n </formula>\n <citations eId="cits_1">\n <citation eId="cits_1__cit_1">\n <p>Having regard to the Treaty on the Functioning of the European Union,</p>\n </citation>\n <citation eId="cits_1__cit_2">\n <p>Having regard to Regulation (EU) No 952/2013 of the European Parliament and of the Council of<date date="2013-10-09">9 October 2013</date>laying down the Union Customs Code<authorialNote class="FOOTNOTE" placement="bottom" GUID="E0001" NUMBERING_CONTINUED="YES"><p><ref eId="ref_1" href="/akn/eu/documentCollection/L/gu/2013-10-10/269/!main#eop_1">OJ L 269, 10.10.2013, p. 1</ref>.</p></authorialNote>, and in particular Article 57(4) and Article 58(2) thereof,</p>\n </citation>\n </citations>\n <recitals eId="recs_1">\n <intro eId="recs_1__intro">\n <p>Whereas:</p>\n </intro>\n <recital eId="recs_1__rec_1">\n <num>(1)</num>\n <p>In order to ensure uniform application of the Combined Nomenclature annexed to Council Regulation (EEC) No 2658/87<authorialNote class="FOOTNOTE" placement="bottom" GUID="E0002" NUMBERING_CONTINUED="YES"><p>Council Regulation (EEC) No 2658/87 of<date date="1987-07-23">23 July 1987</date>on the tariff and statistical nomenclature and on the Common Customs Tariff (<ref eId="ref_2" href="/akn/eu/documentCollection/L/gu/1987-09-07/256/!main#eop_1">OJ L 256, 7.9.1987, p. 1</ref>).</p></authorialNote>, it is necessary to adopt measures concerning the classification of the goods referred to in the Annex to this Regulation.</p>\n </recital>\n <recital eId="recs_1__rec_2">\n <num>(2)</num>\n <p>Regulation (EEC) No 2658/87 has laid down the general rules for the interpretation of the Combined Nomenclature. Those rules apply also to any other nomenclature which is wholly or partly based on it or which adds any additional subdivision to it and which is established by specific provisions of the Union, with a view to the application of tariff and other measures relating to trade in goods.</p>\n </recital>\n <recital eId="recs_1__rec_3">\n <num>(3)</num>\n <p>Pursuant to those general rules, the goods described in column (1) of the table set out in the Annex should be classified under the CN code indicated in column (2), by virtue of the reasons set out in column (3) of that table.</p>\n </recital>\n <recital eId="recs_1__rec_4">\n <num>(4)</num>\n <p>It is appropriate to provide that binding tariff information issued in respect of the goods concerned by this Regulation which does not conform to this Regulation may, for a certain period, continue to be invoked by the holder in accordance with Article 34(9) of Regulation (EU) No 952/2013. That period should be set at three months.</p>\n </recital>\n <recital eId="recs_1__rec_5">\n <num>(5)</num>\n <p>The measures provided for in this Regulation are in accordance with the opinion of the Customs Code Committee,</p>\n </recital>\n </recitals>\n <block name="preamble.final">HAS ADOPTED THIS REGULATION:</block>\n </preamble>\n ', 'similarity': 0.9905888031000001}) (input_keys={'text'}), Example({'celex_id': '32020R1592', 'text': 'THE EUROPEAN COMMISSION,\n\nHaving regard to the Treaty on the Functioning of the European Union,\n\nHaving regard to Regulation (EU) No 1308/2013 of the European Parliament\nand of the Council of 17 December 2013 establishing a common\norganisation of the markets in agricultural products and repealing\nCouncil Regulations (EEC) No 922/72, (EEC) No 234/79, (EC) No 1037/2001\nand (EC) No 1234/2007l\xa0[number="1",footnote="OJ L\xa0347, 20.12.2013, p.\xa0671."], and in particular Article 183(b) thereof,\n\nHaving regard to Regulation (EU) No 510/2014 of the European Parliament\nand of the Council of 16 April 2014 laying down the trade arrangements\napplicable to certain goods resulting from the processing of\nagricultural products and repealing Council Regulations (EC) No\n1216/2009 and (EC) No 614/2009\xa0[number="2",footnote="OJ L\xa0150, 20.5.2014, p.\xa01."], and in particular Article 5(6)(a)\nthereof,\n\nWhereas:\n\n\\(1\\) Commission Regulation (EC) No 1484/95\xa0[number="3",footnote="Commission Regulation (EC) No 1484/95 of 28 June 1995 laying down\n detailed rules for implementing the system of additional import\n duties and fixing representative prices in the poultrymeat and egg\n sectors and for egg albumin, and repealing Regulation No 163/67/EEC\n (OJ L\xa0145, 29.6.1995, p.\xa047)."] lays down detailed\nrules for implementing the system of additional import duties and fixes\nrepresentative prices in the poultrymeat and egg sectors and for egg\nalbumin.\n\n\\(2\\) Regular monitoring of the data used to determine representative\nprices for poultrymeat and egg products and for egg albumin shows that\nthe representative import prices for certain products should be amended\nto take account of variations in price according to origin.\n\n\\(3\\) Regulation (EC) No 1484/95 should therefore be amended\naccordingly.\n\n\\(4\\) Given the need to ensure that this measure applies as soon as\npossible after the updated data have been made available, this\nRegulation should enter into force on the day of its publication,\n\nHAS ADOPTED THIS REGULATION:', 'preamble_text_from_xml': 'THE EUROPEAN COMMISSION, Having regard to the Treaty on the Functioning of the European Union, Having regard to Regulation (EU) No 1308/2013 of the European Parliament and of the Council of 17 December 2013 establishing a common organisation of the markets in agricultural products and repealing Council Regulations (EEC) No 922/72, (EEC) No 234/79, (EC) No 1037/2001 and (EC) No 1234/2007l, and in particular Article 183(b) thereof, Having regard to Regulation (EU) No 510/2014 of the European Parliament and of the Council of 16 April 2014 laying down the trade arrangements applicable to certain goods resulting from the processing of agricultural products and repealing Council Regulations (EC) No 1216/2009 and (EC) No 614/2009, and in particular Article 5(6)(a) thereof, Whereas: (1) Commission Regulation (EC) No 1484/95 lays down detailed rules for implementing the system of additional import duties and fixes representative prices in the poultrymeat and egg sectors and for egg albumin. (2) Regular monitoring of the data used to determine representative prices for poultrymeat and egg products and for egg albumin shows that the representative import prices for certain products should be amended to take account of variations in price according to origin. (3) Regulation (EC) No 1484/95 should therefore be amended accordingly. (4) Given the need to ensure that this measure applies as soon as possible after the updated data have been made available, this Regulation should enter into force on the day of its publication, HAS ADOPTED THIS REGULATION:', 'xml': '<preamble>\n <formula name="preambleFormula">\n <p>THE EUROPEAN COMMISSION,</p>\n </formula>\n <citations eId="cits_1">\n <citation eId="cits_1__cit_1">\n <p>Having regard to the Treaty on the Functioning of the European Union,</p>\n </citation>\n <citation eId="cits_1__cit_2">\n <p>Having regard to Regulation (EU) No 1308/2013 of the European Parliament and of the Council of<date date="2013-12-17">17 December 2013</date>establishing a common organisation of the markets in agricultural products and repealing Council Regulations (EEC) No 922/72, (EEC) No 234/79, (EC) No 1037/2001 and (EC) No 1234/2007l<authorialNote class="FOOTNOTE" placement="bottom" GUID="E0001" NUMBERING_CONTINUED="YES"><p><ref eId="ref_1" href="/akn/eu/documentCollection/L/gu/2013-12-20/347/!main#eop_671">OJ L 347, 20.12.2013, p. 671</ref>.</p></authorialNote>, and in particular Article 183(b) thereof,</p>\n </citation>\n <citation eId="cits_1__cit_3">\n <p>Having regard to Regulation (EU) No 510/2014 of the European Parliament and of the Council of<date date="2014-04-16">16 April 2014</date>laying down the trade arrangements applicable to certain goods resulting from the processing of agricultural products and repealing Council Regulations (EC) No 1216/2009 and (EC) No 614/2009<authorialNote class="FOOTNOTE" placement="bottom" GUID="E0002" NUMBERING_CONTINUED="YES"><p><ref eId="ref_2" href="/akn/eu/documentCollection/L/gu/2014-05-20/150/!main#eop_1">OJ L 150, 20.5.2014, p. 1</ref>.</p></authorialNote>, and in particular Article 5(6)(a) thereof,</p>\n </citation>\n </citations>\n <recitals eId="recs_1">\n <intro eId="recs_1__intro">\n <p>Whereas:</p>\n </intro>\n <recital eId="recs_1__rec_1">\n <num>(1)</num>\n <p>Commission Regulation (EC) No 1484/95<authorialNote class="FOOTNOTE" placement="bottom" GUID="E0003" NUMBERING_CONTINUED="YES"><p>Commission Regulation (EC) No 1484/95 of<date date="1995-06-28">28 June 1995</date>laying down detailed rules for implementing the system of additional import duties and fixing representative prices in the poultrymeat and egg sectors and for egg albumin, and repealing Regulation No 163/67/EEC (<ref eId="ref_3" href="/akn/eu/documentCollection/L/gu/1995-06-29/145/!main#eop_47">OJ L 145, 29.6.1995, p. 47</ref>).</p></authorialNote>lays down detailed rules for implementing the system of additional import duties and fixes representative prices in the poultrymeat and egg sectors and for egg albumin.</p>\n </recital>\n <recital eId="recs_1__rec_2">\n <num>(2)</num>\n <p>Regular monitoring of the data used to determine representative prices for poultrymeat and egg products and for egg albumin shows that the representative import prices for certain products should be amended to take account of variations in price according to origin.</p>\n </recital>\n <recital eId="recs_1__rec_3">\n <num>(3)</num>\n <p>Regulation (EC) No 1484/95 should therefore be amended accordingly.</p>\n </recital>\n <recital eId="recs_1__rec_4">\n <num>(4)</num>\n <p>Given the need to ensure that this measure applies as soon as possible after the updated data have been made available, this Regulation should enter into force on the day of its publication,</p>\n </recital>\n </recitals>\n <block name="preamble.final">HAS ADOPTED THIS REGULATION:</block>\n </preamble>\n ', 'similarity': 0.9838455596000001}) (input_keys={'text'}), Example({'celex_id': '32019R1786', 'text': 'THE EUROPEAN COMMISSION,\n\nHaving regard to the Treaty on the Functioning of the European Union,\n\nHaving regard to Regulation (EU) No 1308/2013 of the European Parliament\nand of the Council of 17 December 2013 establishing a common\norganisation of the markets in agricultural products and repealing\nCouncil Regulations (EEC) No 922/72, (EEC) No 234/79, (EC) No 1037/2001\nand (EC) No 1234/2007\xa0[number="1",footnote="OJ L\xa0347, 20.12.2013, p.\xa0671."], and in particular Article 183(b) thereof,\n\nHaving regard to Regulation (EU) No 510/2014 of the European Parliament\nand of the Council of 16 April 2014 laying down the trade arrangements\napplicable to certain goods resulting from the processing of\nagricultural products and repealing Council Regulations (EC) No\n1216/2009 and (EC) No 614/2009\xa0[number="2",footnote="OJ L\xa0150, 20.5.2014, p.\xa01."], and in particular Article 5(6)(a)\nthereof,\n\nWhereas:\n\n\\(1\\) Commission Regulation (EC) No 1484/95\xa0[number="3",footnote="Commission Regulation (EC) No 1484/95 of 28 June 1995 laying down\n detailed rules for implementing the system of additional import\n duties and fixing representative prices in the poultrymeat and egg\n sectors and for egg albumin, and repealing Regulation No 163/67/EEC\n **(**OJ L\xa0145, 29.6.1995, p.\xa047)."] lays down detailed\nrules for implementing the system of additional import duties and fixes\nrepresentative prices in the poultrymeat and egg sectors and for egg\nalbumin.\n\n\\(2\\) Regular monitoring of the data used to determine representative\nprices for poultrymeat and egg products and for egg albumin shows that\nthe representative import prices for certain products should be amended\nto take account of variations in price according to origin.\n\n\\(3\\) Regulation (EC) No 1484/95 should therefore be amended\naccordingly.\n\n\\(4\\) Given the need to ensure that this measure applies as soon as\npossible after the updated data have been made available, this\nRegulation should enter into force on the day of its publication,\n\nHAS ADOPTED THIS REGULATION:', 'preamble_text_from_xml': 'THE EUROPEAN COMMISSION, Having regard to the Treaty on the Functioning of the European Union, Having regard to Regulation (EU) No 1308/2013 of the European Parliament and of the Council of 17 December 2013 establishing a common organisation of the markets in agricultural products and repealing Council Regulations (EEC) No 922/72, (EEC) No 234/79, (EC) No 1037/2001 and (EC) No 1234/2007, and in particular Article 183(b) thereof, Having regard to Regulation (EU) No 510/2014 of the European Parliament and of the Council of 16 April 2014 laying down the trade arrangements applicable to certain goods resulting from the processing of agricultural products and repealing Council Regulations (EC) No 1216/2009 and (EC) No 614/2009, and in particular Article 5(6)(a) thereof, Whereas: (1) Commission Regulation (EC) No 1484/95 lays down detailed rules for implementing the system of additional import duties and fixes representative prices in the poultrymeat and egg sectors and for egg albumin. (2) Regular monitoring of the data used to determine representative prices for poultrymeat and egg products and for egg albumin shows that the representative import prices for certain products should be amended to take account of variations in price according to origin. (3) Regulation (EC) No 1484/95 should therefore be amended accordingly. (4) Given the need to ensure that this measure applies as soon as possible after the updated data have been made available, this Regulation should enter into force on the day of its publication, HAS ADOPTED THIS REGULATION:', 'xml': '<preamble>\n <formula name="preambleFormula">\n <p>THE EUROPEAN COMMISSION,</p>\n </formula>\n <citations eId="cits_1">\n <citation eId="cits_1__cit_1">\n <p>Having regard to the Treaty on the Functioning of the European Union,</p>\n </citation>\n <citation eId="cits_1__cit_2">\n <p>Having regard to Regulation (EU) No 1308/2013 of the European Parliament and of the Council of<date date="2013-12-17">17 December 2013</date>establishing a common organisation of the markets in agricultural products and repealing Council Regulations (EEC) No 922/72, (EEC) No 234/79, (EC) No 1037/2001 and (EC) No 1234/2007<authorialNote class="FOOTNOTE" placement="bottom" GUID="E0001" NUMBERING_CONTINUED="YES"><p><ref eId="ref_1" href="/akn/eu/documentCollection/L/gu/2013-12-20/347/!main#eop_671">OJ L 347, 20.12.2013, p. 671</ref>.</p></authorialNote>, and in particular Article 183(b) thereof,</p>\n </citation>\n <citation eId="cits_1__cit_3">\n <p>Having regard to Regulation (EU) No 510/2014 of the European Parliament and of the Council of<date date="2014-04-16">16 April 2014</date>laying down the trade arrangements applicable to certain goods resulting from the processing of agricultural products and repealing Council Regulations (EC) No 1216/2009 and (EC) No 614/2009<authorialNote class="FOOTNOTE" placement="bottom" GUID="E0002" NUMBERING_CONTINUED="YES"><p><ref eId="ref_2" href="/akn/eu/documentCollection/L/gu/2014-05-20/150/!main#eop_1">OJ L 150, 20.5.2014, p. 1</ref>.</p></authorialNote>, and in particular Article 5(6)(a) thereof,</p>\n </citation>\n </citations>\n <recitals eId="recs_1">\n <intro eId="recs_1__intro">\n <p>Whereas:</p>\n </intro>\n <recital eId="recs_1__rec_1">\n <num>(1)</num>\n <p>Commission Regulation (EC) No 1484/95<authorialNote class="FOOTNOTE" placement="bottom" GUID="E0003" NUMBERING_CONTINUED="YES"><p>Commission Regulation (EC) No 1484/95 of<date date="1995-06-28">28 June 1995</date>laying down detailed rules for implementing the system of additional import duties and fixing representative prices in the poultrymeat and egg sectors and for egg albumin, and repealing Regulation No 163/67/EEC (<ref eId="ref_3" href="/akn/eu/documentCollection/L/gu/1995-06-29/145/!main#eop_47">OJ L 145, 29.6.1995, p. 47</ref>).</p></authorialNote>lays down detailed rules for implementing the system of additional import duties and fixes representative prices in the poultrymeat and egg sectors and for egg albumin.</p>\n </recital>\n <recital eId="recs_1__rec_2">\n <num>(2)</num>\n <p>Regular monitoring of the data used to determine representative prices for poultrymeat and egg products and for egg albumin shows that the representative import prices for certain products should be amended to take account of variations in price according to origin.</p>\n </recital>\n <recital eId="recs_1__rec_3">\n <num>(3)</num>\n <p>Regulation (EC) No 1484/95 should therefore be amended accordingly.</p>\n </recital>\n <recital eId="recs_1__rec_4">\n <num>(4)</num>\n <p>Given the need to ensure that this measure applies as soon as possible after the updated data have been made available, this Regulation should enter into force on the day of its publication,</p>\n </recital>\n </recitals>\n <block name="preamble.final">HAS ADOPTED THIS REGULATION:</block>\n </preamble>\n ', 'similarity': 0.9838415225}) (input_keys={'text'})]
[Example({'celex_id': '32016R1611', 'text': 'THE EUROPEAN COMMISSION,\n\nHaving regard to the Treaty on the Functioning of the European Union,\n\nHaving regard to the Staff Regulations of officials and Conditions of\nemployment of other servants of the European Union, laid down by Council\nRegulation (EEC, Euratom, ECSC) No 259/68\xa0[number="1",footnote="OJ L\xa056, 4.3.1968, p.\xa01, as last amended by Regulation (EC,\n Euratom) No 1023/2013 (OJ L\xa0287, 29.10.2013, p.\xa015."], and in particular\nArticle 112(2) of the Staff Regulations and Article 13 of Annex VII to\nthe Staff Regulations,\n\nAfter consulting the Staff Regulations Committee,\n\nAfter consulting the staff representatives from the institutions and\nother bodies of the European Union,\n\nWhereas:\n\n\\(1\\) In accordance with Article 13(3) of Annex VII to the Staff\nRegulations, Eurostat has submitted a report\xa0[number="2",footnote="Commission Staff Working Paper --- Eurostat Report on the 2015\n update of mission expenses (daily subsistence allowances and hotel\n ceilings) --- Ref. Ares(2015)6009670 --- 22/12/2015. Available\xa0at:\\\n https://circabc.europa.eu/sd/a/0bbefcd7-ef76-4825-812d-dc78be24b36b/Ares_2015_6009670_UpdateMissionExpenses.7z"] on the evolution of\nthe prices of hotels, restaurants and catering services.\n\n\\(2\\) This report shows that the daily subsistence allowances and the\nhotel ceilings should be reviewed to take account of the evolution of\nthe prices of hotels, restaurants and catering services.\n\n\\(3\\) Reviewing the scale of the daily subsistence allowance and the\nhotel ceilings involves an assessment of complex economic and/or social\nsituations, in which the legislator enjoys a wide margin of discretion.\n\n\\(4\\) The last reform of the Staff Regulations of officials and\nConditions of employment of other servants of the European Union\nunderlined the need for a particular effort by each and every public\nadministration and each and every member of its staff to improve\nefficiency and to adjust to the changing economic and social context in\nEurope.\n\n\\(5\\) Following the accession of the Republic of Croatia to the European\nUnion on 1 July 2013, the reimbursement to officials and other servants\nof expenditure incurred on missions in this country should be subject to\nthe rules set out in Article 13(2)(a) of Annex VII to the Staff\nRegulations,\n\nHAS ADOPTED THIS REGULATION:', 'preamble_text_from_xml': 'THE EUROPEAN COMMISSION, Having regard to the Treaty on the Functioning of the European Union, Having regard to the Staff Regulations of officials and Conditions of employment of other servants of the European Union, laid down by Council Regulation (EEC, Euratom, ECSC) No 259/68, and in particular Article 112(2) of the Staff Regulations and Article 13 of Annex VII to the Staff Regulations, After consulting the Staff Regulations Committee, After consulting the staff representatives from the institutions and other bodies of the European Union, Whereas: (1) In accordance with Article 13(3) of Annex VII to the Staff Regulations, Eurostat has submitted a report on the evolution of the prices of hotels, restaurants and catering services. (2) This report shows that the daily subsistence allowances and the hotel ceilings should be reviewed to take account of the evolution of the prices of hotels, restaurants and catering services. (3) Reviewing the scale of the daily subsistence allowance and the hotel ceilings involves an assessment of complex economic and/or social situations, in which the legislator enjoys a wide margin of discretion. (4) The last reform of the Staff Regulations of officials and Conditions of employment of other servants of the European Union underlined the need for a particular effort by each and every public administration and each and every member of its staff to improve efficiency and to adjust to the changing economic and social context in Europe. (5) Following the accession of the Republic of Croatia to the European Union on 1 July 2013, the reimbursement to officials and other servants of expenditure incurred on missions in this country should be subject to the rules set out in Article 13(2)(a) of Annex VII to the Staff Regulations,HAS ADOPTED THIS REGULATION:', 'xml': '<preamble>\n <formula name="preambleFormula">\n <p>THE EUROPEAN COMMISSION,</p>\n </formula>\n <citations eId="cits_1">\n <citation eId="cits_1__cit_1">\n <p>Having regard to the Treaty on the Functioning of the European Union,</p>\n </citation>\n <citation eId="cits_1__cit_2">\n <p>Having regard to the Staff Regulations of officials and Conditions of employment of other servants of the European Union, laid down by Council Regulation (EEC, Euratom, ECSC) No 259/68<authorialNote class="FOOTNOTE" placement="bottom" GUID="E0001"><p><ref eId="ref_1" href="/akn/eu/documentCollection/L/gu/1968-03-04/056/!main#eop_1">OJ L 56, 4.3.1968, p. 1</ref>, as last amended by Regulation (EC, Euratom) No 1023/2013 (<ref eId="ref_2" href="/akn/eu/documentCollection/L/gu/2013-10-29/287/!main#eop_15">OJ L 287, 29.10.2013, p. 15</ref>.</p></authorialNote>, and in particular Article 112(2) of the Staff Regulations and Article 13 of Annex VII to the Staff Regulations,</p>\n </citation>\n <citation eId="cits_1__cit_3">\n <p>After consulting the Staff Regulations Committee,</p>\n </citation>\n <citation eId="cits_1__cit_4">\n <p>After consulting the staff representatives from the institutions and other bodies of the European Union,</p>\n </citation>\n </citations>\n <recitals eId="recs_1">\n <intro eId="recs_1__intro">\n <p>Whereas:</p>\n </intro>\n <recital eId="recs_1__rec_1">\n <num>(1)</num>\n <p>In accordance with Article 13(3) of Annex VII to the Staff Regulations, Eurostat has submitted a report<authorialNote class="FOOTNOTE" placement="bottom" GUID="E0002"><p>Commission Staff Working Paper — Eurostat Report on the 2015 update of mission expenses (daily subsistence allowances and hotel ceilings) — Ref. Ares(2015)6009670 —<date date="2015-12-22">22/12/2015</date>. Available at: https://circabc.europa.eu/sd/a/0bbefcd7-ef76-4825-812d-dc78be24b36b/Ares_2015_6009670_UpdateMissionExpenses.7z</p></authorialNote>on the evolution of the prices of hotels, restaurants and catering services.</p>\n </recital>\n <recital eId="recs_1__rec_2">\n <num>(2)</num>\n <p>This report shows that the daily subsistence allowances and the hotel ceilings should be reviewed to take account of the evolution of the prices of hotels, restaurants and catering services.</p>\n </recital>\n <recital eId="recs_1__rec_3">\n <num>(3)</num>\n <p>Reviewing the scale of the daily subsistence allowance and the hotel ceilings involves an assessment of complex economic and/or social situations, in which the legislator enjoys a wide margin of discretion.</p>\n </recital>\n <recital eId="recs_1__rec_4">\n <num>(4)</num>\n <p>The last reform of the Staff Regulations of officials and Conditions of employment of other servants of the European Union underlined the need for a particular effort by each and every public administration and each and every member of its staff to improve efficiency and to adjust to the changing economic and social context in Europe.</p>\n </recital>\n <recital eId="recs_1__rec_5">\n <num>(5)</num>\n <p>Following the accession of the Republic of Croatia to the European Union on<date date="2013-07-01">1 July 2013</date>, the reimbursement to officials and other servants of expenditure incurred on missions in this country should be subject to the rules set out in Article 13(2)(a) of Annex VII to the Staff Regulations,</p>\n </recital>\n </recitals>\n <block name="preamble.final">HAS ADOPTED THIS REGULATION:</block>\n </preamble>\n ', 'similarity': 0.9889333824000001}) (input_keys={'text'}), Example({'celex_id': '32017R0311', 'text': 'THE EUROPEAN COMMISSION,\n\nHaving regard to the Treaty on the Functioning of the European Union,\n\nHaving regard to Regulation (EU) No 1308/2013 of the European Parliament\nand of the Council of 17 December 2013 establishing a common\norganisation of the markets in agricultural products and repealing\nCouncil Regulations (EEC) No 922/72, (EEC) No 234/79, (EC) No 1037/2001\nand (EC) No 1234/2007\xa0[number="1",footnote="OJ L\xa0347, 20.12.2013, p.\xa0671."],\n\nHaving regard to Commission Implementing Regulation (EU) No 543/2011 of\n7 June 2011 laying down detailed rules for the application of Council\nRegulation (EC) No 1234/2007 in respect of the fruit and vegetables and\nprocessed fruit and vegetables sectors\xa0[number="2",footnote="OJ L\xa0157, 15.6.2011, p.\xa01."], and in particular Article\n136(1) thereof,\n\nWhereas:\n\n\\(1\\) Implementing Regulation (EU) No 543/2011 lays down, pursuant to\nthe outcome of the Uruguay Round multilateral trade negotiations, the\ncriteria whereby the Commission fixes the standard values for imports\nfrom third countries, in respect of the products and periods stipulated\nin Annex XVI, Part A thereto.\n\n\\(2\\) The standard import value is calculated each working day, in\naccordance with Article 136(1) of Implementing Regulation (EU) No\n543/2011, taking into account variable daily data. Therefore this\nRegulation should enter into force on the day of its publication in the\n*Official Journal of the European Union*,\n\nHAS ADOPTED THIS REGULATION:', 'preamble_text_from_xml': 'THE EUROPEAN COMMISSION, Having regard to the Treaty on the Functioning of the European Union, Having regard to Regulation (EU) No 1308/2013 of the European Parliament and of the Council of 17 December 2013 establishing a common organisation of the markets in agricultural products and repealing Council Regulations (EEC) No 922/72, (EEC) No 234/79, (EC) No 1037/2001 and (EC) No 1234/2007, Having regard to Commission Implementing Regulation (EU) No 543/2011 of 7 June 2011 laying down detailed rules for the application of Council Regulation (EC) No 1234/2007 in respect of the fruit and vegetables and processed fruit and vegetables sectors, and in particular Article 136(1) thereof, Whereas: (1) Implementing Regulation (EU) No 543/2011 lays down, pursuant to the outcome of the Uruguay Round multilateral trade negotiations, the criteria whereby the Commission fixes the standard values for imports from third countries, in respect of the products and periods stipulated in Annex XVI, Part A thereto. (2) The standard import value is calculated each working day, in accordance with Article 136(1) of Implementing Regulation (EU) No 543/2011, taking into account variable daily data. Therefore this Regulation should enter into force on the day of its publication in the Official Journal of the European Union,HAS ADOPTED THIS REGULATION:', 'xml': '<preamble>\n <formula name="preambleFormula">\n <p>THE EUROPEAN COMMISSION,</p>\n </formula>\n <citations eId="cits_1">\n <citation eId="cits_1__cit_1">\n <p>Having regard to the Treaty on the Functioning of the European Union,</p>\n </citation>\n <citation eId="cits_1__cit_2">\n <p>Having regard to Regulation (EU) No 1308/2013 of the European Parliament and of the Council of<date date="2013-12-17">17 December 2013</date>establishing a common organisation of the markets in agricultural products and repealing Council Regulations (EEC) No 922/72, (EEC) No 234/79, (EC) No 1037/2001 and (EC) No 1234/2007<authorialNote class="FOOTNOTE" placement="bottom" GUID="E0001"><p><ref eId="ref_1" href="/akn/eu/documentCollection/L/gu/2013-12-20/347/!main#eop_671">OJ L 347, 20.12.2013, p. 671</ref>.</p></authorialNote>,</p>\n </citation>\n <citation eId="cits_1__cit_3">\n <p>Having regard to Commission Implementing Regulation (EU) No 543/2011 of<date date="2011-06-07">7 June 2011</date>laying down detailed rules for the application of Council Regulation (EC) No 1234/2007 in respect of the fruit and vegetables and processed fruit and vegetables sectors<authorialNote class="FOOTNOTE" placement="bottom" GUID="E0002"><p><ref eId="ref_2" href="/akn/eu/documentCollection/L/gu/2011-06-15/157/!main#eop_1">OJ L 157, 15.6.2011, p. 1</ref>.</p></authorialNote>, and in particular Article 136(1) thereof,</p>\n </citation>\n </citations>\n <recitals eId="recs_1">\n <intro eId="recs_1__intro">\n <p>Whereas:</p>\n </intro>\n <recital eId="recs_1__rec_1">\n <num>(1)</num>\n <p>Implementing Regulation (EU) No 543/2011 lays down, pursuant to the outcome of the Uruguay Round multilateral trade negotiations, the criteria whereby the Commission fixes the standard values for imports from third countries, in respect of the products and periods stipulated in Annex XVI, Part A thereto.</p>\n </recital>\n <recital eId="recs_1__rec_2">\n <num>(2)</num>\n <p>The standard import value is calculated each working day, in accordance with Article 136(1) of Implementing Regulation (EU) No 543/2011, taking into account variable daily data. Therefore this Regulation should enter into force on the day of its publication in the<span class="ITALIC">Official Journal of the European Union</span>,</p>\n </recital>\n </recitals>\n <block name="preamble.final">HAS ADOPTED THIS REGULATION:</block>\n </preamble>\n ', 'similarity': 0.9834734724}) (input_keys={'text'}), Example({'celex_id': '32018R1880', 'text': 'THE EUROPEAN COMMISSION,\n\nHaving regard to the Treaty on the Functioning of the European Union,\n\nHaving regard to Council Directive 92/83/EEC of 19\xa0October 1992 on the\nharmonisation of the structures of excise duties on alcohol and\nalcoholic beverages\xa0[number="1",footnote="OJ L\xa0316, 31.10.1992, p.\xa021."], and in particular Article 27(4) thereof,\n\nWhereas:\n\n\\(1\\) Pursuant to Article 27(1)(a) of Directive 92/83/EEC, Member States\nare required to exempt from excise duty alcohol which has been\ncompletely denatured in accordance with the requirements of any Member\nState, provided that those requirements have been duly notified and\naccepted in accordance with the conditions laid down in paragraphs\xa03 and\n4 of that Article.\n\n\\(2\\) Denaturants which are employed in each Member State for the\npurposes of the complete denaturation of alcohol in accordance with\nArticle 27(1)(a) of Directive 92/83/EEC are described in the Annex to\nCommission Regulation (EC) No\xa03199/93\xa0[number="2",footnote="Commission Regulation (EC) No\xa03199/93 of 22\xa0November 1993 on the\n mutual recognition of procedures for the complete denaturing of\n alcohol for the purposes of exemption from excise duty (OJ L\xa0288,\n 23.11.1993, p.\xa012)."].\n\n\\(3\\) On 6\xa0June 2018, the Czech Republic communicated to the Commission\nthe denaturants which it intends to employ for the complete denaturing\nof alcohol for the purposes of Article 27(1)(a) of that Directive.\n\n\\(4\\) The Commission transmitted that communication to the other Member\nStates on 4\xa0July 2018.\n\n\\(5\\) No objections have been received by the Commission.\n\n\\(6\\) Regulation (EC) No\xa03199/93 should therefore be amended\naccordingly.\n\n\\(7\\) The measures provided for in this Regulation are in accordance\nwith the opinion of the Committee on Excise Duties,\n\nHAS ADOPTED THIS REGULATION:', 'preamble_text_from_xml': 'THE EUROPEAN COMMISSION, Having regard to the Treaty on the Functioning of the European Union, Having regard to Council Directive 92/83/EEC of 19 October 1992 on the harmonisation of the structures of excise duties on alcohol and alcoholic beverages, and in particular Article 27(4) thereof, Whereas: (1) Pursuant to Article 27(1)(a) of Directive 92/83/EEC, Member States are required to exempt from excise duty alcohol which has been completely denatured in accordance with the requirements of any Member State, provided that those requirements have been duly notified and accepted in accordance with the conditions laid down in paragraphs 3 and 4 of that Article. (2) Denaturants which are employed in each Member State for the purposes of the complete denaturation of alcohol in accordance with Article 27(1)(a) of Directive 92/83/EEC are described in the Annex to Commission Regulation (EC) No 3199/93 . (3) On 6 June 2018, the Czech Republic communicated to the Commission the denaturants which it intends to employ for the complete denaturing of alcohol for the purposes of Article 27(1)(a) of that Directive. (4) The Commission transmitted that communication to the other Member States on 4 July 2018. (5) No objections have been received by the Commission. (6) Regulation (EC) No 3199/93 should therefore be amended accordingly. (7) The measures provided for in this Regulation are in accordance with the opinion of the Committee on Excise Duties, HAS ADOPTED THIS REGULATION:', 'xml': '<preamble>\n <formula name="preambleFormula">\n <p>THE EUROPEAN COMMISSION,</p>\n </formula>\n <citations eId="cits_1">\n <citation eId="cits_1__cit_1">\n <p>Having regard to the Treaty on the Functioning of the European Union,</p>\n </citation>\n <citation eId="cits_1__cit_2">\n <p>Having regard to Council Directive 92/83/EEC of<date date="1992-10-19">19 October 1992</date>on the harmonisation of the structures of excise duties on alcohol and alcoholic beverages<authorialNote class="FOOTNOTE" placement="bottom" GUID="E0001"><p><ref eId="ref_1" href="/akn/eu/documentCollection/L/gu/1992-10-31/316/!main#eop_21">OJ L 316, 31.10.1992, p. 21</ref>.</p></authorialNote>, and in particular Article 27(4) thereof,</p>\n </citation>\n </citations>\n <recitals eId="recs_1">\n <intro eId="recs_1__intro">\n <p>Whereas:</p>\n </intro>\n <recital eId="recs_1__rec_1">\n <num>(1)</num>\n <p>Pursuant to Article 27(1)(a) of Directive 92/83/EEC, Member States are required to exempt from excise duty alcohol which has been completely denatured in accordance with the requirements of any Member State, provided that those requirements have been duly notified and accepted in accordance with the conditions laid down in paragraphs 3 and 4 of that Article.</p>\n </recital>\n <recital eId="recs_1__rec_2">\n <num>(2)</num>\n <p>Denaturants which are employed in each Member State for the purposes of the complete denaturation of alcohol in accordance with Article 27(1)(a) of Directive 92/83/EEC are described in the Annex to Commission Regulation (EC) No 3199/93<authorialNote class="FOOTNOTE" placement="bottom" GUID="E0002"><p>Commission Regulation (EC) No 3199/93 of<date date="1993-11-22">22 November 1993</date>on the mutual recognition of procedures for the complete denaturing of alcohol for the purposes of exemption from excise duty (<ref eId="ref_2" href="/akn/eu/documentCollection/L/gu/1993-11-23/288/!main#eop_12">OJ L 288, 23.11.1993, p. 12</ref>).</p></authorialNote>.</p>\n </recital>\n <recital eId="recs_1__rec_3">\n <num>(3)</num>\n <p>On<date date="2018-06-06">6 June 2018</date>, the Czech Republic communicated to the Commission the denaturants which it intends to employ for the complete denaturing of alcohol for the purposes of Article 27(1)(a) of that Directive.</p>\n </recital>\n <recital eId="recs_1__rec_4">\n <num>(4)</num>\n <p>The Commission transmitted that communication to the other Member States on<date date="2018-07-04">4 July 2018</date>.</p>\n </recital>\n <recital eId="recs_1__rec_5">\n <num>(5)</num>\n <p>No objections have been received by the Commission.</p>\n </recital>\n <recital eId="recs_1__rec_6">\n <num>(6)</num>\n <p>Regulation (EC) No 3199/93 should therefore be amended accordingly.</p>\n </recital>\n <recital eId="recs_1__rec_7">\n <num>(7)</num>\n <p>The measures provided for in this Regulation are in accordance with the opinion of the Committee on Excise Duties,</p>\n </recital>\n </recitals>\n <block name="preamble.final">HAS ADOPTED THIS REGULATION:</block>\n </preamble>\n ', 'similarity': 0.990568525}) (input_keys={'text'})]
%% Cell type:code id: tags:
``` python
len(dataset.train)
```
%% Output
50
%% Cell type:markdown id: tags:
## Step 6: Optimizing the Pipeline
%% Cell type:code id: tags:
``` python
from dspy.teleprompt import BootstrapFewShot
```
%% Cell type:code id: tags:
``` python
optimized_programs = {}
for llm in llms:
print(f"Optimizing for {llm}")
selected_llm_name = llm
selected_llm = llms[selected_llm_name]
dspy.settings.configure(lm=selected_llm)
logger = configure_program_compiling_logging(selected_llm_name)
teleprompter = BootstrapFewShot(metric=validation_metric)
optimized_model = teleprompter.compile(PreambleTransformationPipeline(), trainset=dataset.train)
optimized_model.save(f"../../data/preambles/p1/compiled_programs/preambles_{selected_llm_name}_few_shots.prog.json")
optimized_programs[selected_llm_name] = optimized_model
```
%% Output
Optimizing for us.meta.llama3-2-1b-instruct-v1:0
42%|████▏ | 21/50 [00:01<00:02, 11.22it/s]
Bootstrapped 4 full traces after 22 examples in round 0.
Optimizing for us.meta.llama3-2-3b-instruct-v1:0
0%| | 0/50 [00:00<?, ?it/s]16:51:40 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= us.meta.llama3-2-3b-instruct-v1:0; provider = bedrock
16:52:02 - LiteLLM:INFO: utils.py:1002 - Wrapper: Completed Call, calling success_handler
2%|▏ | 1/50 [00:22<18:11, 22.28s/it]16:52:02 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= us.meta.llama3-2-3b-instruct-v1:0; provider = bedrock
16:52:25 - LiteLLM:INFO: utils.py:1002 - Wrapper: Completed Call, calling success_handler
4%|▍ | 2/50 [00:44<17:55, 22.41s/it]16:52:25 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= us.meta.llama3-2-3b-instruct-v1:0; provider = bedrock
16:52:44 - LiteLLM:INFO: utils.py:1002 - Wrapper: Completed Call, calling success_handler
6%|▌ | 3/50 [01:03<16:20, 20.87s/it]16:52:44 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= us.meta.llama3-2-3b-instruct-v1:0; provider = bedrock
16:53:03 - LiteLLM:INFO: utils.py:1002 - Wrapper: Completed Call, calling success_handler
8%|▊ | 4/50 [01:22<15:20, 20.02s/it]16:53:03 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= us.meta.llama3-2-3b-instruct-v1:0; provider = bedrock
16:53:18 - LiteLLM:INFO: utils.py:1002 - Wrapper: Completed Call, calling success_handler
10%|█ | 5/50 [01:38<13:48, 18.41s/it]16:53:18 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= us.meta.llama3-2-3b-instruct-v1:0; provider = bedrock
16:53:37 - LiteLLM:INFO: utils.py:1002 - Wrapper: Completed Call, calling success_handler
12%|█▏ | 6/50 [01:56<13:31, 18.43s/it]16:53:37 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= us.meta.llama3-2-3b-instruct-v1:0; provider = bedrock
16:53:55 - LiteLLM:INFO: utils.py:1002 - Wrapper: Completed Call, calling success_handler
14%|█▍ | 7/50 [02:15<13:14, 18.47s/it]16:53:55 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= us.meta.llama3-2-3b-instruct-v1:0; provider = bedrock
16:54:07 - LiteLLM:INFO: utils.py:1002 - Wrapper: Completed Call, calling success_handler
16%|█▌ | 8/50 [02:27<12:54, 18.44s/it]
Bootstrapped 4 full traces after 9 examples in round 0.
Optimizing for mistral.mistral-7b-instruct-v0:2
0%| | 0/50 [00:00<?, ?it/s]16:54:08 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
2%|▏ | 1/50 [00:01<01:16, 1.57s/it]16:54:09 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
4%|▍ | 2/50 [00:03<01:16, 1.59s/it]16:54:11 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:54:24 - LiteLLM:INFO: utils.py:1002 - Wrapper: Completed Call, calling success_handler
6%|▌ | 3/50 [00:15<05:13, 6.67s/it]16:54:24 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
8%|▊ | 4/50 [00:17<03:37, 4.72s/it]16:54:25 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
10%|█ | 5/50 [00:19<02:43, 3.63s/it]16:54:27 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
10%|█ | 5/50 [00:21<03:10, 4.22s/it]
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
---------------------------------------------------------------------------
HTTPStatusError Traceback (most recent call last)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/llms/bedrock/chat/invoke_handler.py:927, in BedrockLLM.completion(self, model, messages, api_base, custom_prompt_dict, model_response, print_verbose, encoding, logging_obj, optional_params, acompletion, timeout, litellm_params, logger_fn, extra_headers, client)
926 try:
--> 927 response = self.client.post(url=proxy_endpoint_url, headers=prepped.headers, data=data) # type: ignore
928 response.raise_for_status()
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/llms/custom_httpx/http_handler.py:371, in HTTPHandler.post(self, url, data, json, params, headers, stream, timeout)
370 setattr(e, "message", e.response.text)
--> 371 raise e
372 except Exception as e:
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/llms/custom_httpx/http_handler.py:357, in HTTPHandler.post(self, url, data, json, params, headers, stream, timeout)
356 response = self.client.send(req, stream=stream)
--> 357 response.raise_for_status()
358 return response
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/httpx/_models.py:761, in Response.raise_for_status(self)
760 message = message.format(self, error_type=error_type)
--> 761 raise HTTPStatusError(message, request=request, response=self)
HTTPStatusError: Client error '400 Bad Request' for url 'https://bedrock-runtime.us-east-1.amazonaws.com/model/mistral.mistral-7b-instruct-v0:2/invoke'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400
During handling of the above exception, another exception occurred:
BedrockError Traceback (most recent call last)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/main.py:2540, in completion(model, messages, timeout, temperature, top_p, n, stream, stream_options, stop, max_completion_tokens, max_tokens, presence_penalty, frequency_penalty, logit_bias, user, response_format, seed, tools, tool_choice, logprobs, top_logprobs, parallel_tool_calls, deployment_id, extra_headers, functions, function_call, base_url, api_version, api_key, model_list, **kwargs)
2539 else:
-> 2540 response = bedrock_chat_completion.completion(
2541 model=model,
2542 messages=messages,
2543 custom_prompt_dict=custom_prompt_dict,
2544 model_response=model_response,
2545 print_verbose=print_verbose,
2546 optional_params=optional_params,
2547 litellm_params=litellm_params,
2548 logger_fn=logger_fn,
2549 encoding=encoding,
2550 logging_obj=logging,
2551 extra_headers=extra_headers,
2552 timeout=timeout,
2553 acompletion=acompletion,
2554 client=client,
2555 api_base=api_base,
2556 )
2558 if optional_params.get("stream", False):
2559 ## LOGGING
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/llms/bedrock/chat/invoke_handler.py:931, in BedrockLLM.completion(self, model, messages, api_base, custom_prompt_dict, model_response, print_verbose, encoding, logging_obj, optional_params, acompletion, timeout, litellm_params, logger_fn, extra_headers, client)
930 error_code = err.response.status_code
--> 931 raise BedrockError(status_code=error_code, message=err.response.text)
932 except httpx.TimeoutException:
BedrockError: {"message":"This model's maximum context length is 32768 tokens. Please reduce the length of the prompt"}
During handling of the above exception, another exception occurred:
BadRequestError Traceback (most recent call last)
Cell In[24], line 9
7 logger = configure_program_compiling_logging(selected_llm_name)
8 teleprompter = BootstrapFewShot(metric=validation_metric)
----> 9 optimized_model = teleprompter.compile(PreambleTransformationPipeline(), trainset=dataset.train)
10 optimized_model.save(f"../../data/preambles/p1/compiled_programs/preambles_{selected_llm_name}_few_shots.prog.json")
11 optimized_programs[selected_llm_name] = optimized_model
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/teleprompt/bootstrap.py:84, in BootstrapFewShot.compile(self, student, teacher, trainset)
82 self._prepare_student_and_teacher(student, teacher)
83 self._prepare_predictor_mappings()
---> 84 self._bootstrap()
86 self.student = self._train()
87 self.student._compiled = True
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/teleprompt/bootstrap.py:154, in BootstrapFewShot._bootstrap(self, max_bootstraps)
151 break
153 if example_idx not in bootstrapped:
--> 154 success = self._bootstrap_one_example(example, round_idx)
156 if success:
157 bootstrapped[example_idx] = True
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/teleprompt/bootstrap.py:210, in BootstrapFewShot._bootstrap_one_example(self, example, round_idx)
208 current_error_count = self.error_count
209 if current_error_count >= self.max_errors:
--> 210 raise e
211 dspy.logger.error(f"Failed to run or to evaluate example {example} with {self.metric} due to {e}.")
213 if success:
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/teleprompt/bootstrap.py:190, in BootstrapFewShot._bootstrap_one_example(self, example, round_idx)
187 predictor_cache[name] = predictor.demos
188 predictor.demos = [x for x in predictor.demos if x != example]
--> 190 prediction = teacher(**example.inputs())
191 trace = dsp.settings.trace
193 for name, predictor in teacher.named_predictors():
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/primitives/program.py:26, in Module.__call__(self, *args, **kwargs)
25 def __call__(self, *args, **kwargs):
---> 26 return self.forward(*args, **kwargs)
Cell In[10], line 19, in PreambleTransformationPipeline.forward(self, text)
16 return ""
18 # Generate XML for the preamble
---> 19 xml_preamble = self.transform(text=text)
21 # Extract the desired XML part
22 extracted_xml = self.extract_xml(xml_preamble.xml)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/primitives/program.py:26, in Module.__call__(self, *args, **kwargs)
25 def __call__(self, *args, **kwargs):
---> 26 return self.forward(*args, **kwargs)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/predict/chain_of_thought.py:44, in ChainOfThought.forward(self, **kwargs)
41 assert self.activated in [True, False]
43 signature = kwargs.pop("new_signature", self._predict.extended_signature if self.activated else self.signature)
---> 44 return self._predict(signature=signature, **kwargs)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/predict/predict.py:119, in Predict.__call__(self, **kwargs)
118 def __call__(self, **kwargs):
--> 119 return self.forward(**kwargs)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/predict/predict.py:152, in Predict.forward(self, **kwargs)
150 import dspy
151 if isinstance(lm, dspy.LM):
--> 152 completions = v2_5_generate(lm, config, signature, demos, kwargs, _parse_values=self._parse_values)
153 else:
154 warn_once("\t*** In DSPy 2.5, all LM clients except `dspy.LM` are deprecated. ***\n"
155 f" \t\tYou are using the client {lm.__class__.__name__}, which will be removed in DSPy 2.6.\n"
156 " \t\tChanging the client is straightforward and will let you use new features (Adapters) that"
157 " improve the consistency of LM outputs, especially when using chat LMs. \n\n"
158 " \t\tLearn more about the changes and how to migrate at\n"
159 " \t\thttps://github.com/stanfordnlp/dspy/blob/main/examples/migration.ipynb")
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/predict/predict.py:252, in v2_5_generate(lm, lm_kwargs, signature, demos, inputs, _parse_values)
249 import dspy
250 adapter = dspy.settings.adapter or dspy.ChatAdapter()
--> 252 return adapter(lm, lm_kwargs=lm_kwargs, signature=signature, demos=demos, inputs=inputs, _parse_values=_parse_values)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/adapters/base.py:6, in Adapter.__call__(self, lm, lm_kwargs, signature, demos, inputs, _parse_values)
3 inputs = self.format(signature, demos, inputs)
4 inputs = dict(prompt=inputs) if isinstance(inputs, str) else dict(messages=inputs)
----> 6 outputs = lm(**inputs, **lm_kwargs)
7 values = []
9 for output in outputs:
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/clients/lm.py:44, in LM.__call__(self, prompt, messages, **kwargs)
41 else:
42 completion = cached_litellm_text_completion if cache else litellm_text_completion
---> 44 response = completion(ujson.dumps(dict(model=self.model, messages=messages, **kwargs)))
45 outputs = [c.message.content if hasattr(c, "message") else c["text"] for c in response["choices"]]
47 # Logging, with removed api key & where `cost` is None on cache hit.
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/clients/lm.py:85, in cached_litellm_completion(request)
83 @functools.lru_cache(maxsize=None)
84 def cached_litellm_completion(request):
---> 85 return litellm_completion(request, cache={"no-cache": False, "no-store": False})
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/clients/lm.py:90, in litellm_completion(request, cache)
88 def litellm_completion(request, cache={"no-cache": True, "no-store": True}):
89 kwargs = ujson.loads(request)
---> 90 return litellm.completion(cache=cache, **kwargs)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/utils.py:1071, in client.<locals>.wrapper(*args, **kwargs)
1067 if logging_obj:
1068 logging_obj.failure_handler(
1069 e, traceback_exception, start_time, end_time
1070 ) # DO NOT MAKE THREADED - router retry fallback relies on this!
-> 1071 raise e
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/utils.py:959, in client.<locals>.wrapper(*args, **kwargs)
957 print_verbose(f"Error while checking max token limit: {str(e)}")
958 # MODEL CALL
--> 959 result = original_function(*args, **kwargs)
960 end_time = datetime.datetime.now()
961 if "stream" in kwargs and kwargs["stream"] is True:
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/main.py:2957, in completion(model, messages, timeout, temperature, top_p, n, stream, stream_options, stop, max_completion_tokens, max_tokens, presence_penalty, frequency_penalty, logit_bias, user, response_format, seed, tools, tool_choice, logprobs, top_logprobs, parallel_tool_calls, deployment_id, extra_headers, functions, function_call, base_url, api_version, api_key, model_list, **kwargs)
2954 return response
2955 except Exception as e:
2956 ## Map to OpenAI Exception
-> 2957 raise exception_type(
2958 model=model,
2959 custom_llm_provider=custom_llm_provider,
2960 original_exception=e,
2961 completion_kwargs=args,
2962 extra_kwargs=kwargs,
2963 )
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/litellm_core_utils/exception_mapping_utils.py:2131, in exception_type(model, original_exception, custom_llm_provider, completion_kwargs, extra_kwargs)
2129 if exception_mapping_worked:
2130 setattr(e, "litellm_response_headers", litellm_response_headers)
-> 2131 raise e
2132 else:
2133 for error_type in litellm.LITELLM_EXCEPTION_TYPES:
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/litellm_core_utils/exception_mapping_utils.py:852, in exception_type(model, original_exception, custom_llm_provider, completion_kwargs, extra_kwargs)
850 elif original_exception.status_code == 400:
851 exception_mapping_worked = True
--> 852 raise BadRequestError(
853 message=f"BedrockException - {original_exception.message}",
854 llm_provider="bedrock",
855 model=model,
856 response=original_exception.response,
857 )
858 elif original_exception.status_code == 404:
859 exception_mapping_worked = True
BadRequestError: litellm.BadRequestError: BedrockException - {"message":"This model's maximum context length is 32768 tokens. Please reduce the length of the prompt"}
%% Cell type:code id: tags:
``` python
from dspy.evaluate import Evaluate
```
%% Cell type:code id: tags:
``` python
# Create a dev set from the remaining data not used in training
devset = dataset.dev
for optimized_program_name, optimized_program in optimized_programs.items():
print(f"Evaluating {optimized_program}")
# Set up the evaluator
evaluate = Evaluate(devset=devset, metric=validation_metric, num_threads=4, display_progress=True, display_table=0)
# Evaluate the compiled pipeline
results = evaluate(optimized_program)
print(f"Results for {optimized_program_name}: {results}%")
```
%% Output
Evaluating transform = Predict(StringSignature(text -> reasoning, xml
instructions='Create an XML representation of a document preamble section in the Akoma Ntoso (AKN) format. It is composed of formula, citations and recitals.'
text = Field(annotation=str required=True json_schema_extra={'desc': 'Raw text format of the document preamble section', '__dspy_field_type': 'input', 'prefix': 'Text:'})
reasoning = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${reasoning}', '__dspy_field_type': 'output'})
xml = Field(annotation=str required=True json_schema_extra={'desc': 'Akoma Ntoso (AKN) XML representation of the input preamble', '__dspy_field_type': 'output', 'prefix': 'Xml:'})
))
0%| | 0/30 [00:00<?, ?it/s]16:56:16 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
16:56:16 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
16:56:16 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
16:56:16 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Average Metric: 0.0 / 1 (0.0): 3%|▎ | 1/30 [00:27<13:08, 27.18s/it]16:56:43 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
Average Metric: 0.0 / 2 (0.0): 7%|▋ | 2/30 [00:48<11:03, 23.71s/it]16:57:05 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
Average Metric: 0.0 / 3 (0.0): 10%|█ | 3/30 [01:19<12:06, 26.90s/it]16:57:35 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
Average Metric: 0.0 / 4 (0.0): 13%|█▎ | 4/30 [02:26<18:38, 43.00s/it]16:58:43 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:58:53 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:58:57 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:00 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:05 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
16:59:05 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:13 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:14 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:20 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:25 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:27 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:30 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:32 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:33 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:33 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:41 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:42 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
16:59:42 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:44 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
16:59:44 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:49 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:49 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
16:59:49 - LiteLLM:INFO: utils.py:3055 -
LiteLLM completion() model= mistral.mistral-7b-instruct-v0:2; provider = bedrock
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
Give Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new
LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
---------------------------------------------------------------------------
HTTPStatusError Traceback (most recent call last)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/llms/bedrock/chat/invoke_handler.py:927, in BedrockLLM.completion(self, model, messages, api_base, custom_prompt_dict, model_response, print_verbose, encoding, logging_obj, optional_params, acompletion, timeout, litellm_params, logger_fn, extra_headers, client)
926 try:
--> 927 response = self.client.post(url=proxy_endpoint_url, headers=prepped.headers, data=data) # type: ignore
928 response.raise_for_status()
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/llms/custom_httpx/http_handler.py:371, in HTTPHandler.post(self, url, data, json, params, headers, stream, timeout)
370 setattr(e, "message", e.response.text)
--> 371 raise e
372 except Exception as e:
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/llms/custom_httpx/http_handler.py:357, in HTTPHandler.post(self, url, data, json, params, headers, stream, timeout)
356 response = self.client.send(req, stream=stream)
--> 357 response.raise_for_status()
358 return response
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/httpx/_models.py:761, in Response.raise_for_status(self)
760 message = message.format(self, error_type=error_type)
--> 761 raise HTTPStatusError(message, request=request, response=self)
HTTPStatusError: Client error '400 Bad Request' for url 'https://bedrock-runtime.us-east-1.amazonaws.com/model/mistral.mistral-7b-instruct-v0:2/invoke'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400
During handling of the above exception, another exception occurred:
BedrockError Traceback (most recent call last)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/main.py:2540, in completion(model, messages, timeout, temperature, top_p, n, stream, stream_options, stop, max_completion_tokens, max_tokens, presence_penalty, frequency_penalty, logit_bias, user, response_format, seed, tools, tool_choice, logprobs, top_logprobs, parallel_tool_calls, deployment_id, extra_headers, functions, function_call, base_url, api_version, api_key, model_list, **kwargs)
2539 else:
-> 2540 response = bedrock_chat_completion.completion(
2541 model=model,
2542 messages=messages,
2543 custom_prompt_dict=custom_prompt_dict,
2544 model_response=model_response,
2545 print_verbose=print_verbose,
2546 optional_params=optional_params,
2547 litellm_params=litellm_params,
2548 logger_fn=logger_fn,
2549 encoding=encoding,
2550 logging_obj=logging,
2551 extra_headers=extra_headers,
2552 timeout=timeout,
2553 acompletion=acompletion,
2554 client=client,
2555 api_base=api_base,
2556 )
2558 if optional_params.get("stream", False):
2559 ## LOGGING
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/llms/bedrock/chat/invoke_handler.py:931, in BedrockLLM.completion(self, model, messages, api_base, custom_prompt_dict, model_response, print_verbose, encoding, logging_obj, optional_params, acompletion, timeout, litellm_params, logger_fn, extra_headers, client)
930 error_code = err.response.status_code
--> 931 raise BedrockError(status_code=error_code, message=err.response.text)
932 except httpx.TimeoutException:
BedrockError: {"message":"This model's maximum context length is 32768 tokens. Please reduce the length of the prompt"}
During handling of the above exception, another exception occurred:
BadRequestError Traceback (most recent call last)
Cell In[27], line 10
7 evaluate = Evaluate(devset=devset, metric=validation_metric, num_threads=4, display_progress=True, display_table=0)
9 # Evaluate the compiled pipeline
---> 10 results = evaluate(optimized_program)
12 print(f"Results for {optimized_program_name}: {results}%")
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/evaluate/evaluate.py:215, in Evaluate.__call__(self, program, metric, devset, num_threads, display_progress, display_table, return_all_scores, return_outputs)
213 reordered_devset, ncorrect, ntotal = self._execute_single_thread(wrapped_program, devset, display_progress)
214 else:
--> 215 reordered_devset, ncorrect, ntotal = self._execute_multi_thread(
216 wrapped_program,
217 devset,
218 num_threads,
219 display_progress,
220 )
222 dspy.logger.info(f"Average Metric: {ncorrect} / {ntotal} ({round(100 * ncorrect / ntotal, 1)}%)")
224 predicted_devset = sorted(reordered_devset)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/evaluate/evaluate.py:124, in Evaluate._execute_multi_thread(self, wrapped_program, devset, num_threads, display_progress)
121 pbar = tqdm.tqdm(total=len(devset), dynamic_ncols=True, disable=not display_progress)
123 for future in as_completed(futures):
--> 124 example_idx, example, prediction, score = future.result()
126 # use the cancelled_job literal to check if the job was cancelled - use "is" not "=="
127 # in case the prediction is "cancelled" for some reason.
128 if prediction is job_cancelled:
File /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/concurrent/futures/_base.py:438, in Future.result(self, timeout)
436 raise CancelledError()
437 elif self._state == FINISHED:
--> 438 return self.__get_result()
440 self._condition.wait(timeout)
442 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
File /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/concurrent/futures/_base.py:390, in Future.__get_result(self)
388 if self._exception:
389 try:
--> 390 raise self._exception
391 finally:
392 # Break a reference cycle with the exception in self._exception
393 self = None
File /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/concurrent/futures/thread.py:52, in _WorkItem.run(self)
49 return
51 try:
---> 52 result = self.fn(*self.args, **self.kwargs)
53 except BaseException as exc:
54 self.future.set_exception(exc)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/evaluate/evaluate.py:117, in Evaluate._execute_multi_thread.<locals>.cancellable_wrapped_program(idx, arg)
115 if self.cancel_jobs.is_set():
116 return None, None, job_cancelled, None
--> 117 return wrapped_program(idx, arg)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/evaluate/evaluate.py:193, in Evaluate.__call__.<locals>.wrapped_program(example_idx, example)
191 current_error_count = self.error_count
192 if current_error_count >= self.max_errors:
--> 193 raise e
195 if self.provide_traceback:
196 dspy.logger.error(
197 f"Error for example in dev set: \t\t {e}\n\twith inputs:\n\t\t{example.inputs()}\n\nStack trace:\n\t{traceback.format_exc()}"
198 )
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/evaluate/evaluate.py:175, in Evaluate.__call__.<locals>.wrapped_program(example_idx, example)
172 thread_stacks[threading.get_ident()] = list(dspy.settings.main_stack)
174 try:
--> 175 prediction = program(**example.inputs())
176 score = metric(
177 example,
178 prediction,
179 ) # FIXME: TODO: What's the right order? Maybe force name-based kwargs!
181 # increment assert and suggest failures to program's attributes
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/primitives/program.py:26, in Module.__call__(self, *args, **kwargs)
25 def __call__(self, *args, **kwargs):
---> 26 return self.forward(*args, **kwargs)
Cell In[10], line 19, in PreambleTransformationPipeline.forward(self, text)
16 return ""
18 # Generate XML for the preamble
---> 19 xml_preamble = self.transform(text=text)
21 # Extract the desired XML part
22 extracted_xml = self.extract_xml(xml_preamble.xml)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/primitives/program.py:26, in Module.__call__(self, *args, **kwargs)
25 def __call__(self, *args, **kwargs):
---> 26 return self.forward(*args, **kwargs)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/predict/chain_of_thought.py:44, in ChainOfThought.forward(self, **kwargs)
41 assert self.activated in [True, False]
43 signature = kwargs.pop("new_signature", self._predict.extended_signature if self.activated else self.signature)
---> 44 return self._predict(signature=signature, **kwargs)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/predict/predict.py:119, in Predict.__call__(self, **kwargs)
118 def __call__(self, **kwargs):
--> 119 return self.forward(**kwargs)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/predict/predict.py:152, in Predict.forward(self, **kwargs)
150 import dspy
151 if isinstance(lm, dspy.LM):
--> 152 completions = v2_5_generate(lm, config, signature, demos, kwargs, _parse_values=self._parse_values)
153 else:
154 warn_once("\t*** In DSPy 2.5, all LM clients except `dspy.LM` are deprecated. ***\n"
155 f" \t\tYou are using the client {lm.__class__.__name__}, which will be removed in DSPy 2.6.\n"
156 " \t\tChanging the client is straightforward and will let you use new features (Adapters) that"
157 " improve the consistency of LM outputs, especially when using chat LMs. \n\n"
158 " \t\tLearn more about the changes and how to migrate at\n"
159 " \t\thttps://github.com/stanfordnlp/dspy/blob/main/examples/migration.ipynb")
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/predict/predict.py:252, in v2_5_generate(lm, lm_kwargs, signature, demos, inputs, _parse_values)
249 import dspy
250 adapter = dspy.settings.adapter or dspy.ChatAdapter()
--> 252 return adapter(lm, lm_kwargs=lm_kwargs, signature=signature, demos=demos, inputs=inputs, _parse_values=_parse_values)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/adapters/base.py:6, in Adapter.__call__(self, lm, lm_kwargs, signature, demos, inputs, _parse_values)
3 inputs = self.format(signature, demos, inputs)
4 inputs = dict(prompt=inputs) if isinstance(inputs, str) else dict(messages=inputs)
----> 6 outputs = lm(**inputs, **lm_kwargs)
7 values = []
9 for output in outputs:
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/clients/lm.py:44, in LM.__call__(self, prompt, messages, **kwargs)
41 else:
42 completion = cached_litellm_text_completion if cache else litellm_text_completion
---> 44 response = completion(ujson.dumps(dict(model=self.model, messages=messages, **kwargs)))
45 outputs = [c.message.content if hasattr(c, "message") else c["text"] for c in response["choices"]]
47 # Logging, with removed api key & where `cost` is None on cache hit.
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/clients/lm.py:85, in cached_litellm_completion(request)
83 @functools.lru_cache(maxsize=None)
84 def cached_litellm_completion(request):
---> 85 return litellm_completion(request, cache={"no-cache": False, "no-store": False})
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/dspy/clients/lm.py:90, in litellm_completion(request, cache)
88 def litellm_completion(request, cache={"no-cache": True, "no-store": True}):
89 kwargs = ujson.loads(request)
---> 90 return litellm.completion(cache=cache, **kwargs)
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/utils.py:1071, in client.<locals>.wrapper(*args, **kwargs)
1067 if logging_obj:
1068 logging_obj.failure_handler(
1069 e, traceback_exception, start_time, end_time
1070 ) # DO NOT MAKE THREADED - router retry fallback relies on this!
-> 1071 raise e
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/utils.py:959, in client.<locals>.wrapper(*args, **kwargs)
957 print_verbose(f"Error while checking max token limit: {str(e)}")
958 # MODEL CALL
--> 959 result = original_function(*args, **kwargs)
960 end_time = datetime.datetime.now()
961 if "stream" in kwargs and kwargs["stream"] is True:
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/main.py:2957, in completion(model, messages, timeout, temperature, top_p, n, stream, stream_options, stop, max_completion_tokens, max_tokens, presence_penalty, frequency_penalty, logit_bias, user, response_format, seed, tools, tool_choice, logprobs, top_logprobs, parallel_tool_calls, deployment_id, extra_headers, functions, function_call, base_url, api_version, api_key, model_list, **kwargs)
2954 return response
2955 except Exception as e:
2956 ## Map to OpenAI Exception
-> 2957 raise exception_type(
2958 model=model,
2959 custom_llm_provider=custom_llm_provider,
2960 original_exception=e,
2961 completion_kwargs=args,
2962 extra_kwargs=kwargs,
2963 )
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/litellm_core_utils/exception_mapping_utils.py:2131, in exception_type(model, original_exception, custom_llm_provider, completion_kwargs, extra_kwargs)
2129 if exception_mapping_worked:
2130 setattr(e, "litellm_response_headers", litellm_response_headers)
-> 2131 raise e
2132 else:
2133 for error_type in litellm.LITELLM_EXCEPTION_TYPES:
File ~/dev/work/playground/.venv/lib/python3.9/site-packages/litellm/litellm_core_utils/exception_mapping_utils.py:852, in exception_type(model, original_exception, custom_llm_provider, completion_kwargs, extra_kwargs)
850 elif original_exception.status_code == 400:
851 exception_mapping_worked = True
--> 852 raise BadRequestError(
853 message=f"BedrockException - {original_exception.message}",
854 llm_provider="bedrock",
855 model=model,
856 response=original_exception.response,
857 )
858 elif original_exception.status_code == 404:
859 exception_mapping_worked = True
BadRequestError: litellm.BadRequestError: BedrockException - {"message":"This model's maximum context length is 32768 tokens. Please reduce the length of the prompt"}
source diff could not be displayed: it is too large. Options to address this: view the blob.
source diff could not be displayed: it is too large. Options to address this: view the blob.
source diff could not be displayed: it is too large. Options to address this: view the blob.
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
```
%% Cell type:code id: tags:
``` python
import os
import sys
import pandas as pd
# Get the current working directory
current_dir = os.getcwd()
# Append the parent directory to sys.path
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
# Now you can import from functions
from functions import *
```
%% Cell type:code id: tags:
``` python
# load df from data/genai4lex_dataset_info.csv"
df = pd.read_csv("../data/genai4lex_dataset_info.csv")
```
%% Cell type:code id: tags:
``` python
df.head()
```
%% Output
celex_id Act Type Publication Date CELEX Number Language \
0 32016R2088 regulation/REG_IMPL 2016-11-29 32016R2088 eng
1 32020R0194 regulation/REG_IMPL 2020-02-12 32020R0194 eng
2 32017R1595 regulation/REG_IMPL 2017-09-21 32017R1595 eng
3 32017R1581 regulation/REG_IMPL 2017-09-18 32017R1581 eng
4 32020R0180 regulation/REG_IMPL 2020-02-07 32020R0180 eng
Series Type Total Pages Estimated Pages
0 L 1 1.0
1 L 4 5.0
2 L 3 2.0
3 L 1 1.0
4 L 2 1.0
%% Cell type:code id: tags:
``` python
word_docs_dir = '../data/genai4lex_word_docs/'
xml_docs_dir = '../data/genai4lex_dataset/valid/'
md_docs_dir = '../data/genai4lex_md_docs/'
```
%% Cell type:code id: tags:
``` python
# for each etry in df, get the xml document by name from xml_docs_dir
# for each entry in df, get the xml document by name from xml_docs_dir
import json
successful_prefaces = {}
unsuccessful_prefaces = {}
prefaces_found = {}
prefaces_missing = {}
found_count = 0
not_found_count = 0
count_found = 0
count_missing = 0
for entry in df.iterrows():
# get the xml
celex_id = entry[1]['celex_id']
xml_doc = xml_docs_dir + celex_id + '.xml'
if os.path.exists(xml_doc):
preface = extract_document_part(xml_doc, 'preface')
if preface is not None:
# get docx file using celex_id, then extract the content
md_file = md_docs_dir + celex_id + '.md'
if os.path.exists(md_file):
with open(md_file, 'r') as file:
md_content = file.read()
md_content = md_content.replace('\u00A0', ' ')
preface_text = extract_preface(md_content) # HERE : TODO: check how this function impelemnted, inclide all results, let sim results detemine files having issues
if preface_text is not None:
xml_string = re.sub(r'<[^>]+>', '', preface)
successful_prefaces[celex_id] = {
'text': preface_text,
'xml_content': xml_string,
'xml': preface,
xml_document_path = xml_docs_dir + celex_id + '.xml'
if os.path.exists(xml_document_path):
preface_content = extract_document_part(xml_document_path, 'preface')
if preface_content is not None:
# get markdown file using celex_id, then extract the content
markdown_document_path = md_docs_dir + celex_id + '.md'
if os.path.exists(markdown_document_path):
with open(markdown_document_path, 'r') as file:
markdown_content = file.read()
markdown_content = markdown_content.replace('\u00A0', ' ')
preface_text_extracted = extract_preface(markdown_content) # HERE : TODO: check how this function implemented, include all results, let sim results determine files having issues
if preface_text_extracted is not None:
xml_text = re.sub(r'<[^>]+>', '', preface_content)
prefaces_found[celex_id] = {
'text': preface_text_extracted,
'xml_content': xml_text,
'xml': preface_content,
}
found_count += 1
count_found += 1
else:
print(f"No preface for:{celex_id}: {md_content[:10]}")
unsuccessful_prefaces[celex_id] = {
print(f"No preface for:{celex_id}: {markdown_content[:10]}")
prefaces_missing[celex_id] = {
'reason': 'No preface found in markdown',
'md_content_sample': md_content[:100]
'markdown_content_sample': markdown_content[:100]
}
not_found_count += 1
count_missing += 1
else:
print(f"Markdown file not found for: {celex_id}")
unsuccessful_prefaces[celex_id] = {
prefaces_missing[celex_id] = {
'reason': 'Markdown file not found'
}
else:
print(f"Preface not found in XML for: {celex_id}")
unsuccessful_prefaces[celex_id] = {
prefaces_missing[celex_id] = {
'reason': 'Preface not found in XML'
}
else:
print(f"XML file not found for: {celex_id}")
unsuccessful_prefaces[celex_id] = {
'reason': 'XML file not found'
}
print(f"Found prefaces: {found_count}")
print(f"Not found prefaces: {not_found_count}")
# else:
# # print(f"XML file not found for: {celex_id}")
# prefaces_missing[celex_id] = {
# 'reason': 'XML file not found'
# }
print(f"Found prefaces: {count_found}")
print(f"Not found prefaces: {count_missing}")
# Save successful result to JSON file
with open('../data/prefaces/successful_prefaces_dataset.json', 'w') as json_file:
json.dump(successful_prefaces, json_file, indent=2)
with open('../data/prefaces/prefaces_found.json', 'w') as json_file:
json.dump(prefaces_found, json_file, indent=2)
# Save unsuccessful result to JSON file
with open('../data/prefaces/unsuccessful_prefaces_dataset.json', 'w') as json_file:
json.dump(unsuccessful_prefaces, json_file, indent=2)
with open('../data/prefaces/prefaces_missing.json', 'w') as json_file:
json.dump(prefaces_missing, json_file, indent=2)
```
%% Output
XML file not found for: 32020R0194
No preface for:32017R1581: *ANNEX*
+
XML file not found for: 32019R1198
XML file not found for: 32020R1249
XML file not found for: 32017R0648
XML file not found for: 32019L0633
XML file not found for: 32017R1542
XML file not found for: 32020R1275
XML file not found for: 32017R2063
XML file not found for: 32019R0251
XML file not found for: 32019R0906
XML file not found for: 32019R1155R(01)
XML file not found for: 42019X2141
XML file not found for: 32020R2224
XML file not found for: 42019X0795
XML file not found for: 32018L1713
No preface for:32019R1890R(01): ![](media/
XML file not found for: 32019R2122
XML file not found for: 32016R1191
XML file not found for: 32017R1185
XML file not found for: 32016R1185
XML file not found for: 42018X0178
XML file not found for: 32018R1563
XML file not found for: 32017R1146
No preface for:32019R0127: REGULATION
No preface for:32017R2467: ![](media/
XML file not found for: 32018R2042
XML file not found for: 32019L0790
XML file not found for: 32019R0480
No preface for:32017R0391R(01): CORRIGENDA
No preface for:32018R0774R(01): CORRIGENDA
No preface for:32017R0390R(01): CORRIGENDA
XML file not found for: 32018R1013
No preface for:32017R1350: *ANNEX*
+
No preface for:32017L2455R(01): ![](media/
XML file not found for: 32019R0331
XML file not found for: 32019R2240
XML file not found for: 32018R0330
XML file not found for: 32018R0456
XML file not found for: 32019R2241
XML file not found for: 32018R1012
No preface for:32017R1351: EN
L_201[
No preface for:32018R0318: REGULATION
XML file not found for: 32018R2043
XML file not found for: 32017R0271
XML file not found for: 32017R2300
No preface for:32017R2314: *ANNEX*
#
No preface for:32016R1628R(02): ![](media/
XML file not found for: 32018R0640
XML file not found for: 32018R0683
No preface for:32017R1970R(01): ![](media/
XML file not found for: 32018R0046
XML file not found for: 32019R2123
XML file not found for: 42018X0780
XML file not found for: 32016R1033
XML file not found for: 42020X0486
XML file not found for: 32020R2225
No preface for:32017R0366R(02): CORRIGENDA
No preface for:32012R0267R(08): ![](media/
No preface for:32018R0317R(01): CORRIGENDA
No preface for:32017R1796: *ANNEX*
No preface for:32017R0107: *ANNEX*
+
XML file not found for: 32019R0250
XML file not found for: 32019R0244
XML file not found for: 32018R1628
XML file not found for: 32016R0891
XML file not found for: 32017R0649
XML file not found for: 32017R0891
XML file not found for: 32019R0287
XML file not found for: 32017R2089
No preface for:32017R1580: *ANNEX*
+
XML file not found for: 32017R0852
No preface for:32017R1582: *ANNEX*
+
No preface for:32016R2095R(01): CORRIGENDA
XML file not found for: 32017R0754R(01)
XML file not found for: 32019R1602
XML file not found for: 32016L2370
No preface for:32019R0244R(01): CORRIGENDA
No preface for:32018R1285R(01): ![](media/
XML file not found for: 32018R0252
No preface for:32018R0848R(02): CORRIGENDA
XML file not found for: 32019L1937
No preface for:32017R1958: *ANNEX*
Q
XML file not found for: 32019R0905
No preface for:32015L0412R(03): ![](media/
XML file not found for: 42019X2142
XML file not found for: 32017R1970
XML file not found for: 32020R2227
XML file not found for: 32017R1019
XML file not found for: 32018R0871
XML file not found for: 32018R1212
XML file not found for: 32020R0550
No preface for:32018R2069: ![](media/
XML file not found for: 32019R0124
XML file not found for: 32017R0746R(02)
No preface for:32017R0978R(01): CORRIGENDA
XML file not found for: 32016R1384
No preface for:32017R1384: *ANNEX*
#
XML file not found for: 32015R2424R(05)
No preface for:32012R0528R(08): ![](media/
XML file not found for: 32019R0440
XML file not found for: 32019R2242
XML file not found for: 32019R0455
XML file not found for: 32018R0455
XML file not found for: 32017R2101
No preface for:32017R1434: *ANNEX II*
XML file not found for: 32016R1346
No preface for:32016R1103R(01): ![](media/
XML file not found for: 32016R2303
XML file not found for: 32018R0643
XML file not found for: 32019R0125
XML file not found for: 32017R0272
XML file not found for: 32019R1213
XML file not found for: 32016R1150
No preface for:32018R0120R(02): ![](media/
No preface for:32017R1398R(01): ![](media/
XML file not found for: 32017R2459
XML file not found for: 32018R0858
No preface for:32018R0286R(01): CORRIGENDA
XML file not found for: 32019R0694
XML file not found for: 32017R1187
XML file not found for: 32020R0592
XML file not found for: 32020R1713
XML file not found for: 32018R0045
XML file not found for: 32017R0460
XML file not found for: 32019L0369
XML file not found for: 32018R0723
XML file not found for: 32017R1018
XML file not found for: 32020R0357
XML file not found for: 32016L0943
XML file not found for: 32017R1795
XML file not found for: 32017R0699
XML file not found for: 32020R1298
No preface for:32014L0065R(09): ![](media/
XML file not found for: 32017R1578
XML file not found for: 42018X1857
XML file not found for: 32017L1132
XML file not found for: 32020R2034
No preface for:32016R1346R(01): CORRIGENDA
No preface for:32019R1198R(01): CORRIGENDA
XML file not found for: 32017R0458
XML file not found for: 32020R0353
XML file not found for: 42018X0630
XML file not found for: 32018R0647
No preface for:32016R2307: *ANNEX*
+
XML file not found for: 42018X1704
XML file not found for: 32016R1395
XML file not found for: 32017R0712
XML file not found for: 32019R1997
XML file not found for: 32019R0479
XML file not found for: 32019R1014
XML file not found for: 32020R2155
XML file not found for: 32016R1357
XML file not found for: 32019R1996
No preface for:32018R0034R(02): CORRIGENDA
XML file not found for: 32018R1969
XML file not found for: 42018X1705
XML file not found for: 32016R0908
No preface for:31990L0270R(03): ![](media/
XML file not found for: 32017R1141
XML file not found for: 32018R1570
XML file not found for: 32016R1627
No preface for:32016R1169: *ANNEX*
+
XML file not found for: 32018R0120
No preface for:32016R2306: *ANNEX*
+
XML file not found for: 32020R0568
No preface for:32018R0939R(01): CORRIGENDA
No preface for:32018R1506R(01): CORRIGENDA
XML file not found for: 32019R2092
XML file not found for: 32016R1035
XML file not found for: 32017R0459
XML file not found for: 32019R2131
No preface for:32014R0600R(05): ![](media/
No preface for:32019R0817R(01): ![](media/
No preface for:32016R1009: ANNEX
# I
XML file not found for: 32019R0915
XML file not found for: 32017L0952
XML file not found for: 32016R1237
No preface for:32018L0822R(03): ![](media/
XML file not found for: 32019R0530
No preface for:32003R2201R(15): CORRIGENDA
XML file not found for: 32019R0297
No preface for:32017R1590: *ANNEX*
+
No preface for:32013L0036R(02): ПОПРАВКИ
XML file not found for: 32017R0117
XML file not found for: 32016R2072
XML file not found for: 32020R2037
XML file not found for: 32017R1962
XML file not found for: 32018R0056
No preface for:32016R2264: EN
L_201[
XML file not found for: 32020R2221
XML file not found for: 32020R0422
XML file not found for: 32016R1037
XML file not found for: 32019R1374
No preface for:32012R0652R(01): CORRIGENDA
XML file not found for: 32019R2090
XML file not found for: 32019R0687
XML file not found for: 32019R0877
No preface for:32017R2382R(01): CORRIGENDA
XML file not found for: 32018R0644
XML file not found for: 32020R0230
XML file not found for: 42018X1707
XML file not found for: 32017R0705
XML file not found for: 32017R1369
No preface for:32017R1084R(02): CORRIGENDA
No preface for:32016R0341R(03): CORRIGENDA
XML file not found for: 32018R1017
XML file not found for: 32017R2107
XML file not found for: 42018X1706
XML file not found for: 32020R0972
XML file not found for: 32020R0966
No preface for:32017R2454R(01): CORRIGENDA
XML file not found for: 32016R1624
XML file not found for: 32018R1229
XML file not found for: 32019R0123
No preface for:32016R1181: *ANNEX*
#
XML file not found for: 32019R0876
XML file not found for: 32016R1036
XML file not found for: 32020R2220
No preface for:32016R0780R(01): CORRIGENDA
XML file not found for: 32017R1778
XML file not found for: 32019R2126
XML file not found for: 32020R0379
XML file not found for: 32016R1778
No preface for:32019R0907R(02): CORRIGENDA
No preface for:32018R1497R(01): CORRIGENDU
XML file not found for: 32016R1977
XML file not found for: 32017R0658
XML file not found for: 32017R0880
XML file not found for: 32019L2034
XML file not found for: 32018R0255
XML file not found for: 32017R0830
No preface for:32014R0596R(03): ![](media/
No preface for:32019R0934R(02): CORRIGENDA
No preface for:32019R0798R(01): CORRIGENDA
No preface for:32018R1845: **Regulati
XML file not found for: 32019R1662
XML file not found for: 32016R1247
XML file not found for: 32016R1253
No preface for:32018R0848R(04): CORRIGENDA
XML file not found for: 32020R1216
XML file not found for: 32019R0554
XML file not found for: 32017R1938
No preface for:32018R0231R(01): **CORRIGEN
XML file not found for: 32018R0959
XML file not found for: 32018R0781
No preface for:32020R0217R(01): **CORRIGEN
XML file not found for: 32020R0444
XML file not found for: 32018R0756
XML file not found for: 32017R0373
XML file not found for: 32017R0367
No preface for:32016R1870: *ANNEX*
#
No preface for:32015L2366R(05): ![](media/
XML file not found for: 32020R1148
XML file not found for: 32017R1131
No preface for:32017R1119: EN
L_201[
XML file not found for: 32018R2035
XML file not found for: 32019R2035
XML file not found for: 32017R0575
XML file not found for: 32018R0408
XML file not found for: 32017R0763
No preface for:32016L2258: ![](media/
XML file not found for: 32017R0005
XML file not found for: 32019R2237
XML file not found for: 42018X1947
XML file not found for: 32019R2236
No preface for:32014R1357R(02): CORRIGENDA
XML file not found for: 32018R1065
XML file not found for: 32018L0725
No preface for:32020R0900: ![](media/
XML file not found for: 32017R1497
XML file not found for: 32018R2034
No preface for:32017R1118: *ANNEX*
+
XML file not found for: 32019R2020
XML file not found for: 32019R1267
No preface for:32019R1295R(02): CORRIGENDA
No preface for:32018R0033R(02): Corrigenda
XML file not found for: 32018R0186
XML file not found for: 32017R0366
XML file not found for: 32020R1001
No preface for:32013R0952R(03): ![](media/
XML file not found for: 32019L1995
XML file not found for: 32018R1475
XML file not found for: 32018R1139
XML file not found for: 32018L0645
No preface for:32019R1677: Regulation
XML file not found for: 32016R1246
XML file not found for: 32019R1688
XML file not found for: 32017R0825
No preface for:32018R0574R(01): CORRIGENDA
XML file not found for: 32020R0123
XML file not found for: 32020R1229
XML file not found for: 32020R1215
XML file not found for: 32018R0231
XML file not found for: 32018R0782
XML file not found for: 32019R2181
XML file not found for: 32018R1488
XML file not found for: 32019R0033
XML file not found for: 32017R2215
No preface for:32017R0745R(01): ![](media/
XML file not found for: 32019R2197R(03)
No preface for:32018R1517: Commission
No preface for:32016R2349: *ANNEX*
+
No preface for:32020R0533: REGULATION
XML file not found for: 32020R0527
XML file not found for: 32019R1259
XML file not found for: 32018R0153
XML file not found for: 32017R0576
XML file not found for: 32019R0621
XML file not found for: 32017R2361
XML file not found for: 32019R2022
No preface for:32016R1668: *ANNEX*
+
XML file not found for: 32019R1926
XML file not found for: 32017R0748
XML file not found for: 32019R1701
XML file not found for: 32019R1715
XML file not found for: 32017R2177
No preface for:32013R0604R(01): ![](media/
XML file not found for: 32016R0561R(02)
XML file not found for: 32019R1099
No preface for:32018R0683R(01): CORRIGENDA
XML file not found for: 32017R1480
XML file not found for: 32019R2023
No preface for:32017R2406: *ANNEX*
+
XML file not found for: 32017R2360
XML file not found for: 32020R0526
No preface for:32016R2348: *ANNEX*
+
XML file not found for: 32020R0532
No preface for:32017R1568R(01): ![](media/
Markdown file not found for: 32016R1872
No preface for:32018R2025R(01): ![](media/
XML file not found for: 32019R0807
XML file not found for: 32017R0371
XML file not found for: 32016R2214
XML file not found for: 32020R1758
XML file not found for: 32017L2110
XML file not found for: 32019R1966R(01)
XML file not found for: 32018R0783
XML file not found for: 32019R2180
No preface for:32017R0141R(01): CORRIGENDA
No preface for:32017R0326R(01): CORRIGENDA
XML file not found for: 42019X0253
XML file not found for: 32020R2084
XML file not found for: 32019R2199R(01)
XML file not found for: 32020R0873
XML file not found for: 32020R0697
No preface for:32017R1916: *ANNEX*
+
XML file not found for: 32019R0787
XML file not found for: 32018R1300
XML file not found for: 32020R0442
XML file not found for: 32016R1731
Markdown file not found for: 32019R2033R(02)
No preface for:32011L0035R(03): ![](media/
No preface for:32016R1692: *ANNEX*
+
XML file not found for: 32019R0817
XML file not found for: 32019R2033
XML file not found for: 32019R0624
XML file not found for: 32007R1441R(09)
XML file not found for: 32019R1248
XML file not found for: 32018R2033
XML file not found for: 32017R0567
XML file not found for: 32019L0520
XML file not found for: 32015L1480R(01)
XML file not found for: 32019R0618
No preface for:32017R1137: *ANNEX*
+
XML file not found for: 32019L1158
XML file not found for: 42018X1997
XML file not found for: 32019R1089
XML file not found for: 32014L0047R(02)
XML file not found for: 42020X0031
XML file not found for: 32016R1447
XML file not found for: 32019R2219
XML file not found for: 32016R0341R(05)
XML file not found for: 32018R1711
XML file not found for: 32019R1705
XML file not found for: 32018R1077
XML file not found for: 32017L2455R(06)
XML file not found for: 32016R0957
No preface for:32018R0705R(01): ![](media/
No preface for:32016R1821R(03): CORRIGENDA
XML file not found for: 32020R0523
XML file not found for: 32017R2359
No preface for:32019L1159: ![](media/
XML file not found for: 32019R2026
XML file not found for: 32017R0566
XML file not found for: 32019R0631
XML file not found for: 32019R0625
XML file not found for: 32020R1173
XML file not found for: 32019R0816
XML file not found for: 32016R1724
XML file not found for: 32017R2205
XML file not found for: 32019R1329
XML file not found for: 32018R0976
No preface for:31973R0558R(02): CORRIGENDA
XML file not found for: 32016R1903
XML file not found for: 32016R1240
No preface for:32019R1685R(01): CORRIGENDA
XML file not found for: 32018R0221
XML file not found for: 32020R0696
XML file not found for: 32019R1842
XML file not found for: 32019R1840
XML file not found for: 32020R0870
XML file not found for: 32019R1868
XML file not found for: 32017R0821
XML file not found for: 32017R0612
XML file not found for: 32018R0545
XML file not found for: 32016R2005
No preface for:32016R0958R(01): CORRIGENDA
XML file not found for: 32017R1915
No preface for:32017R1154R(01): CORRIGENDA
XML file not found for: 32018R0784
XML file not found for: 32018R0974
XML file not found for: 32017R0389
No preface for:32017R1097: *ANNEX*
+
XML file not found for: 32020R0469
XML file not found for: 32016R1726
XML file not found for: 32020R1818
XML file not found for: 32017L0853
XML file not found for: 32018R0196
XML file not found for: 32020R1824
No preface for:32016R1691: *ANNEX*
+
XML file not found for: 32019R2024
No preface for:32016R1824R(01): CORRIGENDA
XML file not found for: 32019R0627
XML file not found for: 32019R2018
XML file not found for: 32020R0521
No preface for:32017R1120: *ANNEX*
+
XML file not found for: 32017R0969
XML file not found for: 42020X0032
No preface for:32018R1605R(01): ![](media/
XML file not found for: 32017R1478
XML file not found for: 32017R1444
XML file not found for: 32019R1706
XML file not found for: 32017R2158
XML file not found for: 32020R0736
XML file not found for: 32018R1712
XML file not found for: 32020R0044
XML file not found for: 32019R1715R(01)
XML file not found for: 32018R1048
XML file not found for: 32019R0424
XML file not found for: 32017R0001
XML file not found for: 32018R0395
XML file not found for: 32016R1647
No preface for:32014L0090R(05): ![](media/
XML file not found for: 42018X0862
XML file not found for: 32019R2019
XML file not found for: 32013R0575R(04)
XML file not found for: 32016R2372
XML file not found for: 32017R0217
XML file not found for: 32018R2025
XML file not found for: 32017R2400
XML file not found for: 32019R0626
XML file not found for: 32020R0508
XML file not found for: 32017R0565
XML file not found for: 32018R0140
No preface for:32016R1690: *ANNEX*
+
No preface for:32016R1104R(01): ![](media/
XML file not found for: 32019R0829
No preface for:32016R1733: *ANNEX*
R
No preface for:32016L0798R(05): ![](media/
XML file not found for: 32016L2102
No preface for:31985R2799R(02): CORRIGENDA
XML file not found for: 32020R1992
XML file not found for: 32017R2206
No preface for:32016R0424R(01): ![](media/
XML file not found for: 32016R1928
No preface for:32017R1096: *ANNEX*
+
XML file not found for: 32018R0975
XML file not found for: 32018R0785
XML file not found for: 32019R2186
XML file not found for: 42019X0057
XML file not found for: 32018L2057
XML file not found for: 32019R1666
XML file not found for: 32019R1896
No preface for:32018R0848R(05): ![](media/
XML file not found for: 32018R1882
XML file not found for: 42020X0176
XML file not found for: 42020X0638
XML file not found for: 32017R1514
XML file not found for: 32019R1131
XML file not found for: 32020R1579
No preface for:32017R0144: *ANNEX*
#
XML file not found for: 32018R1119
No preface for:32013R0665R(02): CORRIGENDA
XML file not found for: 32018R0788
XML file not found for: 32020R1784
XML file not found for: 32019R1333
XML file not found for: 32019L1023
XML file not found for: 32018R1469
XML file not found for: 32019R2160
XML file not found for: 32018R0763
XML file not found for: 32020R1182
XML file not found for: 32017R0583
XML file not found for: 32019R0818
XML file not found for: 32018R1253
XML file not found for: 32016R1104
No preface for:32017R2419: *ANNEX*
+
XML file not found for: 32019R0159
XML file not found for: 32016R1824R(02)
XML file not found for: 32020R1627
XML file not found for: 32019R2014
No preface for:32016R0965: *ANNEX*
+
XML file not found for: 42018X1998
XML file not found for: 32018R1092
XML file not found for: 32019R0429
No preface for:32017R0757: *ANNEX*
#
XML file not found for: 32017R2154
XML file not found for: 32020R0712
XML file not found for: 32018R1722
XML file not found for: 32018R0978R(01)
XML file not found for: 32016R0958
XML file not found for: 32019R1939
XML file not found for: 32020R0909
XML file not found for: 32019R2015
XML file not found for: 32016R1139
XML file not found for: 32020R1140
XML file not found for: 32017R0421
XML file not found for: 32018R0762
XML file not found for: 32018R1468
XML file not found for: 32020R0464
No preface for:32019R1706R(01): CORRIGENDA
No preface for:32017R2225R(01): ![](media/
No preface for:32017R1918: *ANNEX*
#
XML file not found for: 32019R0945
No preface for:32013L0059R(04): ![](media/
XML file not found for: 32018R0574
XML file not found for: 32020R0128
XML file not found for: 32020R0699
XML file not found for: 32017R0804
XML file not found for: 32019R1681
No preface for:32020R0213R(01): ![](media/
XML file not found for: 32019R1873
XML file not found for: 42019X0273
XML file not found for: 32020R2098
No preface for:32017R1517: *ANNEX II*
XML file not found for: 32020R0658
XML file not found for: 32019R0576
XML file not found for: 32020R0894
No preface for:32017R1098: *ANNEX*
+
XML file not found for: 32017R1932
XML file not found for: 32017R0392
No preface for:32016R1715: *ANNEX*
#
No preface for:32019R0352R(01): ![](media/
XML file not found for: 32017R0423
XML file not found for: 32019R2163
XML file not found for: 32019R0833
XML file not found for: 32019R0628
XML file not found for: 32019R1250
XML file not found for: 32020R1156
XML file not found for: 32019R2017
XML file not found for: 32018R1091
No preface for:32016R0966: *ANNEX*
+
XML file not found for: 32017R2195
No preface for:32016R0999: *ANNEX*
+
XML file not found for: 32019R2201
No preface for:32017L2455R(05): CORRIGENDA
No preface for:32018R0832R(01): CORRIGENDA
XML file not found for: 32019L0713
XML file not found for: 32019R1735
XML file not found for: 32016L2284
No preface for:32016R0967: *ANNEX*
+
XML file not found for: 32017L1564
XML file not found for: 32019R2016
Markdown file not found for: 32017R2341
XML file not found for: 32017R0218
XML file not found for: 32019R1286
XML file not found for: 32020R1816
XML file not found for: 32018R0775
No preface for:32016R1728: *ANNEX*
+
XML file not found for: 32018R0761
XML file not found for: 32017R0422
XML file not found for: 32020R0473
XML file not found for: 32018R0985
No preface for:32017R1099: *ANNEX*
+
No preface for:31972R1473R(06): CORRIGENDA
XML file not found for: 32019R0952
XML file not found for: 32020R0881
No preface for:32016R0466R(02): ![](media/
XML file not found for: 32016L1148
No preface for:32017L2455: ![](media/
XML file not found for: 42019X0272
XML file not found for: 32020R2099
XML file not found for: 32017R1566R(01)
XML file not found for: 32020R1590
XML file not found for: 32017R0185
XML file not found for: 32018R1866
XML file not found for: 32018R1862
No preface for:32014L0023R(03): ![](media/
XML file not found for: 32020R0852
XML file not found for: 32019L0878
XML file not found for: 32020R0885
XML file not found for: 32016L1164
XML file not found for: 32018R0573
No preface for:32016R0466R(03): ![](media/
XML file not found for: 32017L2109
XML file not found for: 32019R0765
XML file not found for: 32016R1076
XML file not found for: 32019R0759
XML file not found for: 32019R0981
XML file not found for: 32020R0477
XML file not found for: 32017R0591
No preface for:32017R2386: *ANNEX*
+
XML file not found for: 32018R0188
No preface for:32018R0553R(01): CORRIGENDA
No preface for:32004R0723R(08): ![](media/
No preface for:32016R2351: *ANNEX*
+
XML file not found for: 32017R0220
XML file not found for: 32018R0163
XML file not found for: 32020R0271
No preface for:32017R1116: EN
L_201[
No preface for:32019R0033R(02): CORRIGENDA
XML file not found for: 42018X1593
XML file not found for: 32019R1916
XML file not found for: 32016R0963
XML file not found for: 32016R1328
No preface for:32016R0988: ANNEX
+--
XML file not found for: 32019R2238
XML file not found for: 32018R1725
XML file not found for: 32019R2239
XML file not found for: 32018R1724
XML file not found for: 32017R0745
XML file not found for: 32016R1329
XML file not found for: 32019R0360
XML file not found for: 32017R0989
No preface for:32016R0976: *ANNEX*
+
XML file not found for: 42018X1592
XML file not found for: 32020R0502
XML file not found for: 32016R1103
XML file not found for: 32020R0270
XML file not found for: 32019R2007
No preface for:32016R2350: *ANNEX*
+
XML file not found for: 32018R0823
No preface for:32017R2387: EN
L_201[
XML file not found for: 32020R1191
No preface for:32016R1705: Regulation
XML file not found for: 32016R1077
XML file not found for: 32019L1024
XML file not found for: 32017L2108
XML file not found for: 32017R2230
XML file not found for: 32019R0943
XML file not found for: 32020R1783
XML file not found for: 32020R0112
XML file not found for: 32019R1122
XML file not found for: 32019R0566
XML file not found for: 32020R1230
XML file not found for: 32020R1224
XML file not found for: 32017R0625
XML file not found for: 32020R0884
XML file not found for: 32019R1687
XML file not found for: 32019L0879
XML file not found for: 32019R1693
XML file not found for: 32019R2199R(02)
XML file not found for: 32018R1263R(01)
XML file not found for: 32020R0851
XML file not found for: 32018R1861
XML file not found for: 32019R1685
XML file not found for: 32020L1687
No preface for:32018R1565R(01): CORRIGENDA
XML file not found for: 32018R1108
XML file not found for: 32017R0141
XML file not found for: 32014R1368R(01)
XML file not found for: 32020R0104
XML file not found for: 32016L1629
XML file not found for: 32020R1781
XML file not found for: 32017L1371
XML file not found for: 32017R2232
XML file not found for: 32018R0028
No preface for:32012R0036R(04): RETIFICAÇÕ
XML file not found for: 32016R1075
XML file not found for: 32017R2391
No preface for:32013R1302R(01): ![](media/
No preface for:32016R1854: *ANNEX*
+
No preface for:32013R1303R(01): **CORRIGEN
No preface for:32017R2385: *ANNEX*
+
XML file not found for: 32019R1295
No preface for:32018R0583R(01): CORRIGENDA
XML file not found for: 32017R1129
No preface for:32015R0220R(02): CORRIGENDA
XML file not found for: 32017R0579
XML file not found for: 32018R0148
No preface for:32019R0216R(02): ![](media/
XML file not found for: 32017R0747
XML file not found for: 32018R1726
XML file not found for: 32019R1726
XML file not found for: 32020R2100
XML file not found for: 32017R2179
XML file not found for: 32018R1727
XML file not found for: 32017R0746
No preface for:32016R0975: *ANNEX*
+
No preface for:32017R1570R(01): CORRIGENDA
XML file not found for: 42018X1591
No preface for:32016R1824R(03): CORRIGENDA
No preface for:32017R2421: *ANNEX*
+
XML file not found for: 32017R1128
No preface for:32011R1169R(12): ![](media/
XML file not found for: 32018R0161
XML file not found for: 32018R0607
No preface for:32016R2347: *ANNEX*
+
No preface for:32016R1896: *ANNEX*
Q
XML file not found for: 32019R0834
No preface for:32019R1295R(01): CORRIGENDA
XML file not found for: 32017R0587
XML file not found for: 32017R1538R(01)
No preface for:32018R1832R(05): CORRIGENDA
No preface for:32017R0907R(01): ![](media/
XML file not found for: 32018L1972
XML file not found for: 42019X1354
XML file not found for: 32018R1121
No preface for:32017R1509R(03): ![](media/
XML file not found for: 32020R0105
XML file not found for: 32017R1510
XML file not found for: 32016R2031
XML file not found for: 32020R0893
XML file not found for: 32018R1684
XML file not found for: 32018R1690
XML file not found for: 32018R1860
XML file not found for: 42020X1223
XML file not found for: 32016R2081
XML file not found for: 32018R1807
XML file not found for: 42018X1858
XML file not found for: 32017R1563
XML file not found for: 32018R1146
XML file not found for: 32020R2013
No preface for:32014L0065R(06): ![](media/
No preface for:32017R0659R(01): CORRIGENDA
XML file not found for: 32017R0127
XML file not found for: 32019R0099
XML file not found for: 32019R1387
XML file not found for: 32019R0026R(01)
XML file not found for: 42020X1021
XML file not found for: 32020R1081
No preface for:32018R0049R(01): CORRIGENDA
Markdown file not found for: 32017R2297
XML file not found for: 32019R1344
XML file not found for: 32018L0410
No preface for:32006R0561R(07): RETIFICAÇÕ
XML file not found for: 32019R2117
XML file not found for: 32019R0072
XML file not found for: 32019R0714
XML file not found for: 42020X1169
XML file not found for: 32019R2088
XML file not found for: 32017R1542R(01)
No preface for:32016R2308: *ANNEX*
+
XML file not found for: 32016R1167
XML file not found for: 32020R0572
XML file not found for: 42019X1723
XML file not found for: 32020R1485
No preface for:32016L1629R(01): ![](media/
No preface for:32016R0044R(01): CORRIGENDA
No preface for:32017R0708: *ANNEX*
+
No preface for:32017R1370: ![](media/
XML file not found for: 32019L0001
XML file not found for: 32018R1999
XML file not found for: 32019R2016R(01)
XML file not found for: 32019R1782
No preface for:32019R1716R(01): **Corrigen
XML file not found for: 32019R0661
XML file not found for: 32017R2321
XML file not found for: 32016R1628
No preface for:32019R0113: ## **REGUL
XML file not found for: 32018R0649
No preface for:32017R2454R(02): ![](media/
XML file not found for: 32020R1686
XML file not found for: 32019R0073
No preface for:32016R0314R(01): CORRIGENDA
XML file not found for: 32019R0715
XML file not found for: 32019R0067
XML file not found for: 32019R1379
XML file not found for: 32017R0330
No preface for:32017R1760: *ANNEX*
#
XML file not found for: 32016R1012
XML file not found for: 32020R1080
No preface for:32016R1047R(01): CORRIGENDA
XML file not found for: 32019R0517
XML file not found for: 32017R1238
XML file not found for: 32017R0654
XML file not found for: 32020L1151
XML file not found for: 32019R0050R(01)
XML file not found for: 32018R1806
XML file not found for: 32017R0867
XML file not found for: 32016R0867
XML file not found for: 32013R1385R(02)
XML file not found for: 32019R1838
XML file not found for: 32016R0865
XML file not found for: 32018L0822
XML file not found for: 32019R1804
No preface for:32015R0063R(03): CORRIGENDA
XML file not found for: 32020R1294
XML file not found for: 32017R0118
No preface for:32020R2004: REGULATION
XML file not found for: 32017R2055
XML file not found for: 32019R0515
XML file not found for: 32018R0273
XML file not found for: 32017R0656
XML file not found for: 32019R1384
No preface for:31989L0666R(02): ![](media/
XML file not found for: 32019R0059
XML file not found for: 32020R1069
XML file not found for: 32018R0688
XML file not found for: 32019R0688
XML file not found for: 32019R0887
XML file not found for: 32020R0571
XML file not found for: 32020R0217
XML file not found for: 32018R1541
XML file not found for: 32016R2337
XML file not found for: 32020R0559
No preface for:32008R1272R(17): ![](media/
XML file not found for: 32017R0087
XML file not found for: 32016R1400
XML file not found for: 32019L0771
XML file not found for: 32016R2134
XML file not found for: 32020R1478
XML file not found for: 32019L0770
XML file not found for: 32017R0079
XML file not found for: 32016R1401
XML file not found for: 32017R0086
XML file not found for: 32018R1971
XML file not found for: 32016L0844R(01)
No preface for:32017R0841R(01): CORRIGENDA
XML file not found for: 32018R1795
XML file not found for: 32019R1781
XML file not found for: 32020R1646
XML file not found for: 32018R0676
XML file not found for: 32016R2336
XML file not found for: 32018R0886
XML file not found for: 32019R0886
XML file not found for: 32017R1171
XML file not found for: 42018X0629
XML file not found for: 32017R1993
XML file not found for: 32019R2115
XML file not found for: 32020L0700
No preface for:31972R1369R(01): CORRIGENDA
XML file not found for: 32019R2129
XML file not found for: 32016R1011
XML file not found for: 32016R1777
No preface for:32015R1986R(02): CORRIGENDA
XML file not found for: 32017R1005
XML file not found for: 32016R2281
XML file not found for: 32018R0931
XML file not found for: 32020R1530
XML file not found for: 32016R0799R(01)
XML file not found for: 32020R1524
No preface for:32020R2011: REGULATION
XML file not found for: 32018R1805
XML file not found for: 32019R1839
XML file not found for: 32016R0860
XML file not found for: 32017L1852
XML file not found for: 32017R0135
XML file not found for: 32020R2015
XML file not found for: 32019R1154
XML file not found for: 32017R0109
XML file not found for: 32016R2285
XML file not found for: 32018R0921
No preface for:32016R2246: *ANNEX*
#
XML file not found for: 32019R2105
XML file not found for: 32019R0712
No preface for:32017R2252: ![](media/
XML file not found for: 32017R1997
No preface for:32016L0798R(01): ![](media/
No preface for:32016R1773: *ANNEX*
No preface for:32006R1801R(02): ![](media/
XML file not found for: 32019R1587
XML file not found for: 32018R1236
XML file not found for: 32017R2468
XML file not found for: 32016R1388
XML file not found for: 32019R0464
XML file not found for: 32020R2161
XML file not found for: 32019R1020
XML file not found for: 32016R2119
XML file not found for: 32019L2162
No preface for:32017R0068: Commission
XML file not found for: 32019R1021
XML file not found for: 32020R0039
XML file not found for: 32019R0317
No preface for:32019R0818R(01): CORRIGENDA
XML file not found for: 32018R0471
XML file not found for: 32019R1948
XML file not found for: 42019X1724
XML file not found for: 32019R1784
No preface for:32016R1103R(02): ![](media/
XML file not found for: 32018R1974
XML file not found for: 32020R0561
XML file not found for: 32017R2469
XML file not found for: 32018R2058
XML file not found for: 32020R0213
XML file not found for: 32018R1551
No preface for:32018R2070: ![](media/
XML file not found for: 32017R0242
XML file not found for: 32018R1579
XML file not found for: 32017R0295
XML file not found for: 32020R1694
XML file not found for: 32018R0049
XML file not found for: 32017R0336
XML file not found for: 32017R1982
XML file not found for: 32019R1394
No preface for:32014R0901R(01): CORRIGENDA
No preface for:32018L2002: ![](media/
XML file not found for: 32017R1570
XML file not found for: 32020R2014
XML file not found for: 32016L2341
No preface for:32019R1155: ![](media/
XML file not found for: 32015L0996R(01)
No preface for:32018R0511: ![](media/
XML file not found for: 32018R0277
No preface for:32017R1018R(02): CORRIGENDA
XML file not found for: 42020X0110
XML file not found for: 42019X1120
XML file not found for: 42020X1597
XML file not found for: 32020R2002
XML file not found for: 32017R0678
XML file not found for: 32019R1382
XML file not found for: 32016R2251
XML file not found for: 32017R1994
No preface for:32017R2226R(03): ![](media/
XML file not found for: 32019R0856
XML file not found for: 32019R1590
XML file not found for: 32019R2099
XML file not found for: 32019R1584
XML file not found for: 32018R2066
XML file not found for: 32018R0671
XML file not found for: 32018R0659
XML file not found for: 32018R1221
XML file not found for: 32016L1919
No preface for:32017R0095: ANNEX
# P
XML file not found for: 32018R1976
XML file not found for: 32019L0944
XML file not found for: 32017R0724
XML file not found for: 32019R0473
XML file not found for: 32020R2176
XML file not found for: 32020R0761
XML file not found for: 32018R0329
XML file not found for: 32020R2162
XML file not found for: 32019R1022
XML file not found for: 32019L2161
No preface for:32018L0957R(01): SPROSTOWAN
No preface for:32016R0841R(03): ![](media/
No preface for:32017R1349: EN
ANNEX
XML file not found for: 32019R0472
XML file not found for: 32018L1695R(01)
XML file not found for: 32017R0094
XML file not found for: 42019X0405
XML file not found for: 32019L0789
XML file not found for: 32018R2067
XML file not found for: 32013R1019R(02)
XML file not found for: 32017R1188
XML file not found for: 42018X0798
XML file not found for: 32017R1759
XML file not found for: 32017R1981
XML file not found for: 32016R2250
XML file not found for: 32018R0076
No preface for:32017R0484: *ANNEX*
+
No preface for:32017R0366R(01): CORRIGENDA
XML file not found for: 32018R1624
XML file not found for: 32019R1156
XML file not found for: 32020R0600
XML file not found for: 32017R0679
No preface for:32017R1795R(01): CORRIGENDA
No preface for:32014L0065R(07): ![](media/
XML file not found for: 32020L0262
XML file not found for: 32018R1618
XML file not found for: 42020X1596
XML file not found for: 32018R0260
XML file not found for: 32020R0199
Found prefaces: 2905
Not found prefaces: 223
%% Cell type:markdown id: tags:
### TODO: extract doc number from immc file,
We opt to extract the number from xml file since there are multiple doc numbers in immc file, we cannot know which one is the correct.
%% Cell type:code id: tags:
``` python
# this code is not used, there are multiple doc numbers in immc file, we cannot know which one is the correct.
# we'll use the document number from the xml file unstead
# Add these imports at the top of your notebook if not already present
# import os
# import xml.etree.ElementTree as ET
# # Function to extract document number from IMMC files using XPath
# def extract_document_number(file_path):
# tree = ET.parse(file_path)
# root = tree.getroot()
# # Find all 'cm:extension' elements of the specific type and extract 'ojext:official_number'
# extension_tags = root.findall(".//cm:extension[@xsi:type='ojext:PrintProductionFileActWorkExtensionType']", namespaces={
# 'cm': 'http://publications.europa.eu/resource/core-metadata',
# 'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
# 'ojext': 'urn:eu:oj:extensions'
# })
# # Extract all official numbers
# official_numbers = [ext.find(".//ojext:official_number", namespaces={'ojext': 'urn:eu:oj:extensions'}) for ext in extension_tags if ext.find(".//ojext:official_number", namespaces={'ojext': 'urn:eu:oj:extensions'}) is not None]
# official_numbers = [num.text for num in official_numbers if num is not None]
# # Get the latest official number (assuming the last one in the list is the latest; adjust if necessary based on other criteria)
# latest_official_number = official_numbers[-1] if official_numbers else None
# return latest_official_number
# # Directory containing IMMC files
# # doc_number = root.find('.//cmt:transmission', namespace).get('id')
# # if doc_number is not None:
# # return doc_number
# # return None
# # Directory containing IMMC files
# immc_dir = 'data/genai4lex_immc/'
# # Create a dictionary to store document numbers
# document_numbers = {}
# # Extract document numbers from IMMC files
# for filename in os.listdir(immc_dir):
# if filename.endswith('.xml'):
# celex_id = filename.split('.')[0]
# file_path = os.path.join(immc_dir, filename)
# doc_number = extract_document_number(file_path)
# if doc_number:
# document_numbers[celex_id] = doc_number
# print(f"{celex_id}: {doc_number}")
```
%% Cell type:code id: tags:
``` python
!pip install lxml tqdm beautifulsoup4 scikit-learn
```
%% Output
Requirement already satisfied: lxml in /Users/nasredine/dev/work/playground/.venv/lib/python3.9/site-packages (5.2.2)
Requirement already satisfied: tqdm in /Users/nasredine/dev/work/playground/.venv/lib/python3.9/site-packages (4.66.4)
Requirement already satisfied: beautifulsoup4 in /Users/nasredine/dev/work/playground/.venv/lib/python3.9/site-packages (4.12.3)
Requirement already satisfied: scikit-learn in /Users/nasredine/dev/work/playground/.venv/lib/python3.9/site-packages (1.5.1)
Requirement already satisfied: soupsieve>1.2 in /Users/nasredine/dev/work/playground/.venv/lib/python3.9/site-packages (from beautifulsoup4) (2.5)
Requirement already satisfied: numpy>=1.19.5 in /Users/nasredine/dev/work/playground/.venv/lib/python3.9/site-packages (from scikit-learn) (2.0.0)
Requirement already satisfied: scipy>=1.6.0 in /Users/nasredine/dev/work/playground/.venv/lib/python3.9/site-packages (from scikit-learn) (1.13.1)
Requirement already satisfied: joblib>=1.2.0 in /Users/nasredine/dev/work/playground/.venv/lib/python3.9/site-packages (from scikit-learn) (1.3.2)
Requirement already satisfied: threadpoolctl>=3.1.0 in /Users/nasredine/dev/work/playground/.venv/lib/python3.9/site-packages (from scikit-learn) (3.5.0)
[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: pip install --upgrade pip
%% Cell type:code id: tags:
``` python
import json
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from difflib import SequenceMatcher
```
%% Cell type:code id: tags:
``` python
# compute text similarity between text and xml after removing tags
import string
import pandas as pd
data = []
for celex_id, content in successful_prefaces.items():
for celex_id, content in prefaces_found.items():
text_original = content['text']
text = content['text']
xml = content['xml']
# Remove tags from XML
soup = BeautifulSoup(xml, 'xml')
xml_text = soup.get_text()
# Remove special characters like new lines, tabs, and extra white spaces
text = re.sub(r'\s+', ' ', text).strip()
xml_text = re.sub(r'\s+', ' ', xml_text).strip()
# Normalize texts and convert to lower case
text = text.lower()
xml_text = xml_text.lower()
# Remove any remaining newlines, tabs, and extra white spaces
# Remove all whitespace and punctuation
text = re.sub(r'\W', '', text.lower())
xml_text = re.sub(r'\W', '', xml_text.lower())
# Compute similarity using SequenceMatcher with all possible junk characters
similarity = SequenceMatcher(lambda x: x in string.punctuation + string.whitespace , text, xml_text).ratio()
data.append((celex_id, text_original, xml, similarity))
# Create DataFrame
df = pd.DataFrame(data, columns=['celex_id', 'text', 'xml', 'similarity'])
df.set_index('celex_id', inplace=True)
```
%% Cell type:code id: tags:
``` python
# Extract document number from XML content
doc_numbers = []
for celex_id, content in successful_prefaces.items():
for celex_id, content in prefaces_found.items():
xml = content['xml']
# Parse XML to find document number
soup = BeautifulSoup(xml, 'xml')
doc_number_tag = soup.find('docNumber')
doc_number = doc_number_tag.get_text() if doc_number_tag else None
doc_numbers.append((celex_id, doc_number))
# Create DataFrame for document numbers
df_doc_numbers = pd.DataFrame(doc_numbers, columns=['celex_id', 'doc_number'])
df_doc_numbers.set_index('celex_id', inplace=True)
# Merge the two DataFrames
df = df.merge(df_doc_numbers, left_index=True, right_index=True)
```
%% Cell type:code id: tags:
``` python
print(f"Average similarity: {df['similarity'].mean()}")
```
%% Output
Average similarity: 0.9730024117039671
%% Cell type:code id: tags:
``` python
# Print first row
print(df.iloc[0])
```
%% Output
text Commission Implementing Regulation (EU) 2016/....
xml <preface>\n <longTitle>\n <p>\n ...
similarity 0.979452
doc_number 2016/2088
Name: 32016R2088, dtype: object
%% Cell type:code id: tags:
``` python
# Sort DataFrame by similarity in descending order
df_sorted = df.sort_values(by='similarity', ascending=False)
# Print sorted results
print("Top 10 documents with highest similarity:")
print(df_sorted.head(10)[['similarity']])
print("\nBottom 10 documents with lowest similarity:")
print(df_sorted.tail(10)[['similarity']])
print(f"\nAverage similarity: {df['similarity'].mean():.4f}")
```
%% Output
Top 10 documents with highest similarity:
similarity
celex_id
32019R0168 0.997125
32020R2007 0.996767
32018R0917 0.996283
32016R0861 0.995925
32017R0394 0.995851
32017R0841 0.995745
32019R0707 0.995540
32017R2393 0.995208
32019R0230 0.994965
32016R0950 0.994914
Bottom 10 documents with lowest similarity:
similarity
celex_id
32019R0365 0.162257
32018R0122 0.148297
32020R1226 0.145522
32019R0157 0.143590
32018R0066 0.127946
32019R2199 0.099068
32018R0830 0.029810
32017R2016 0.011055
32018R0295 0.000706
32017L0738 0.000595
Average similarity: 0.9730
%% Cell type:code id: tags:
``` python
# Print text and xml of row 32018R0295
row = df.loc['32019R0168']
print(f"text: {row['text']}")
print(f"xml: {row['xml']}")
```
%% Output
text: COMMISSION IMPLEMENTING REGULATION (EU) 2019/...
of 31 January 2019
amending Implementing Regulation (EU) No 540/2011 as regards the
extension of the approval periods of the active substances abamectin,
*Bacillus subtilis* (Cohn 1872) Strain QST 713, *Bacillus thuringiensis*
subsp. Aizawai, *Bacillus thuringiensis* subsp. israeliensis, *Bacillus
thuringiensis* subsp. kurstaki, *Beauveria bassiana*, benfluralin,
clodinafop, clopyralid, *Cydia pomonella Granulovirus* (CpGV),
cyprodinil, dichlorprop-P, epoxiconazole, fenpyroximate, fluazinam,
flutolanil, fosetyl, *Lecanicillium muscarium*, mepanipyrim, mepiquat,
*Metarhizium anisopliae* var. Anisopliae, metconazole, metrafenone,
*Phlebiopsis gigantea*, pirimicarb, *Pseudomonas chlororaphis* strain:
MA 342, pyrimethanil*, Pythium oligandrum*, rimsulfuron, spinosad,
*Streptomyces* K61, thiacloprid, tolclofos-methyl, *Trichoderma
asperellum*, *Trichoderma atroviride*, *Trichoderma gamsii*,
*Trichoderma harzianum*, triclopyr, trinexapac, triticonazole,
*Verticillium albo-atrum* and ziram
(Text with EEA relevance)
xml: <preface>
<longTitle>
<p>
<span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/168</docNumber></span>
</p>
<p>of<docDate date="2019-01-31">31 January 2019</docDate></p>
<p>amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances abamectin,<span class="ITALIC">Bacillus subtilis</span>(Cohn 1872) Strain QST 713,<span class="ITALIC">Bacillus thuringiensis</span>subsp. Aizawai,<span class="ITALIC">Bacillus thuringiensis</span>subsp. israeliensis,<span class="ITALIC">Bacillus thuringiensis</span>subsp. kurstaki,<span class="ITALIC">Beauveria bassiana</span>, benfluralin, clodinafop, clopyralid,<span class="ITALIC">Cydia pomonella Granulovirus</span>(CpGV), cyprodinil, dichlorprop-P, epoxiconazole, fenpyroximate, fluazinam, flutolanil, fosetyl,<span class="ITALIC">Lecanicillium muscarium</span>, mepanipyrim, mepiquat,<span class="ITALIC">Metarhizium anisopliae</span>var. Anisopliae, metconazole, metrafenone,<span class="ITALIC">Phlebiopsis gigantea</span>, pirimicarb,<span class="ITALIC">Pseudomonas chlororaphis</span>strain: MA 342, pyrimethanil,<span class="ITALIC">Pythium oligandrum</span>, rimsulfuron, spinosad,<span class="ITALIC">Streptomyces</span>K61, thiacloprid, tolclofos-methyl,<span class="ITALIC">Trichoderma asperellum</span>, <span class="ITALIC">Trichoderma atroviride</span>, <span class="ITALIC">Trichoderma gamsii</span>, <span class="ITALIC">Trichoderma harzianum</span>, triclopyr, trinexapac, triticonazole,<span class="ITALIC">Verticillium albo-atrum</span>and ziram</p>
<p>(Text with EEA relevance)</p>
</longTitle>
</preface>
%% Cell type:markdown id: tags:
## save dataset
%% Cell type:code id: tags:
``` python
df
df_sorted
```
%% Output
text \
celex_id
32016R2088 Commission Implementing Regulation (EU) 2016/....
32017R1595 Commission Implementing Regulation (EU) 2017/....
32020R0180 COMMISSION IMPLEMENTING REGULATION (EU) 2020/....
32018R1832 Commission Regulation (EU) 2018/...\n\nof 5 No...
32018R0292 Commission Implementing Regulation (EU) 2018/....
32019R0168 COMMISSION IMPLEMENTING REGULATION (EU) 2019/....
32020R2007 COMMISSION IMPLEMENTING REGULATION (EU) 2020/....
32018R0917 Commission Implementing Regulation (EU) 2018/....
32016R0861 Commission Delegated Regulation (EU) 2016/...\...
32017R0394 COMMISSION IMPLEMENTING REGULATION (EU) 2017/\...
... ...
32016R0876 COMMISSION IMPLEMENTING REGULATION (EU) 2016/\...
32020R1287 COMMISSION IMPLEMENTING REGULATION (EU) 2020/....
32017R2091 COMMISSION IMPLEMENTING REGULATION (EU) 2017/....
32017R1598 Commission Implementing Regulation (EU) 2017/....
32020R1293 COMMISSION IMPLEMENTING REGULATION (EU) 2020/....
32019R2199 ![](media/image1.emf)\n\nEXPLANATORY MEMORANDU...
32018R0830 ![](media/image1.emf)\n\nEXPLANATORY MEMORANDU...
32017R2016 ![](media/image1.emf)\n\nEXPLANATORY MEMORANDU...
32018R0295 ![](media/image1.emf)\n\nEXPLANATORY MEMORANDU...
32017L0738 ![](media/image1.emf)\n\nLANGUAGES concerned: ...
xml similarity \
celex_id
32016R2088 <preface>\n <longTitle>\n <p>\n ... 0.979452
32017R1595 <preface>\n <longTitle>\n <p>\n ... 0.985714
32020R0180 <preface>\n <longTitle>\n <p>\n ... 0.973621
32018R1832 <preface>\n <longTitle>\n <p>\n ... 0.992925
32018R0292 <preface>\n <longTitle>\n <p>\n ... 0.991482
32019R0168 <preface>\n <longTitle>\n <p>\n ... 0.997125
32020R2007 <preface>\n <longTitle>\n <p>\n ... 0.996767
32018R0917 <preface>\n <longTitle>\n <p>\n ... 0.996283
32016R0861 <preface>\n <longTitle>\n <p>\n ... 0.995925
32017R0394 <preface>\n <longTitle>\n <p>\n ... 0.995851
... ... ...
32016R0876 <preface>\n <longTitle>\n <p>\n ... 0.845471
32020R1287 <preface>\n <longTitle>\n <p>\n ... 0.982558
32017R2091 <preface>\n <longTitle>\n <p>\n ... 0.990769
32017R1598 <preface>\n <longTitle>\n <p>\n ... 0.989011
32020R1293 <preface>\n <longTitle>\n <p>\n ... 0.983607
32019R2199 <preface>\n <longTitle>\n <p>\n ... 0.099068
32018R0830 <preface>\n <longTitle>\n <p>\n ... 0.029810
32017R2016 <preface>\n <longTitle>\n <p>\n ... 0.011055
32018R0295 <preface>\n <longTitle>\n <p>\n ... 0.000706
32017L0738 <preface>\n <longTitle>\n <p>Counc... 0.000595
doc_number
celex_id
32016R2088 2016/2088
32017R1595 2017/1595
32020R0180 2020/180
32018R1832 2018/1832
32018R0292 2018/292
32019R0168 2019/168
32020R2007 2020/2007
32018R0917 2018/917
32016R0861 2016/861
32017R0394 2017/394
... ...
32016R0876 2016/876
32020R1287 2020/1287
32017R2091 2017/2091
32017R1598 2017/1598
32020R1293 2020/1293
32019R2199 2019/2199
32018R0830 2018/830
32017R2016 2017/2016
32018R0295 2018/295
32017L0738 2009/48
[2905 rows x 4 columns]
%% Cell type:code id: tags:
``` python
import os
# Create directory if it does not exist
output_dir = '../data/prefaces/dataset'
output_dir = '../data/prefaces'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Add document id with text and xml content
df_with_content = df_sorted.reset_index()[['celex_id', 'text', 'xml', 'doc_number', 'similarity']]
df_with_content.to_json(os.path.join(output_dir, 'prefaces.json'), orient='records', indent=2)
df_with_content.to_json(os.path.join(output_dir, 'dataset.json'), orient='records', indent=2)
```
%% Cell type:code id: tags:
``` python
# TODO: do analysis about dataset, documents types...
```
......
%% Cell type:code id: tags:
``` python
!pip install -q dspy-ai python-dotenv rouge-score
!pip install -q dspy-ai==2.4 python-dotenv rouge-score
```
%% Output
1342.71s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: pip install --upgrade pip
%% Cell type:code id: tags:
``` python
from importlib.metadata import version
print(f"dspy-ai version: {version('dspy-ai')}")
```
%% Output
dspy-ai version: 2.4.0
%% Cell type:code id: tags:
``` python
from dotenv import load_dotenv
import os
import json
import re
import sys
```
%% Cell type:code id: tags:
%% Cell type:markdown id: tags:
``` python
## Step 0: Setup
%% Cell type:code id: tags:
``` python
# Get the current working directory
current_dir = os.getcwd()
# Append the parent directory to sys.path
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
# Now you can import from functions
from functions import *
```
%% Cell type:code id: tags:
``` python
os.environ['DSP_CACHEBOOL'] = 'false'
os.environ["DSP_NOTEBOOK_CACHEDIR"] = os.path.join(os.getcwd(), 'cache')
```
%% Cell type:code id: tags:
``` python
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
```
%% Cell type:markdown id: tags:
## Step 1: Setup
%% Cell type:markdown id: tags:
### Check environment variables
%% Cell type:code id: tags:
``` python
check_env_vars()
```
%% Output
OPENAI_API_KEY is set. Value: sk-p****************************************************
AWS_ACCESS_KEY_ID is set. Value: AKIA****************
AWS_SECRET_ACCESS_KEY is set. Value: WWYs************************************
AWS_SESSION_TOKEN is not set.
%% Cell type:markdown id: tags:
### Initialize LLMs
%% Cell type:code id: tags:
``` python
import dspy
gpt4o = dspy.OpenAI(api_key=api_key, model='gpt-4o-mini', max_tokens=10000)
dspy.settings.configure(lm=gpt4o)
aws_provider_ue1 = dspy.Bedrock(region_name="eu-west-1")
llms = {
"llama3.2.1b": dspy.AWSMeta(
aws_provider=aws_provider_ue1,
model="eu.meta.llama3-2-1b-instruct-v1:0",
max_new_tokens=2047
),
"llama3.2.3b": dspy.AWSMeta(
aws_provider=aws_provider_ue1,
model="eu.meta.llama3-2-3b-instruct-v1:0",
# max_tokens=12000, # TODO: limited in the api, however we can increase the context size to 132k for such model
max_context_size = 10000,
# max_new_tokens = 5000,
),
# "gpt4o-mini" : dspy.LM('openai/gpt-4o-mini', max_tokens=10000, cache=False),
"gpt4o-mini-2" : dspy.OpenAI(api_key=api_key, model='gpt-4o-mini', max_tokens=10000)
}
```
%% Output
%% Cell type:markdown id: tags:
/Users/nasredine/dev/work/playground/.venv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
from .autonotebook import tqdm as notebook_tqdm
## Step 1: basic experiment
%% Cell type:markdown id: tags:
### 0-shot conversion
%% Cell type:code id: tags:
``` python
class SimplePrefaceSignature(dspy.Signature):
"""Create an XML representation of a document preface section in the Akoma Ntoso (AKN) format."""
text = dspy.InputField(desc="Raw text format of the document prefece section")
xml = dspy.OutputField(desc="Akoma Ntoso (AKN) XML representation of the input preface")
def zero_shot_conversion(lm):
text = "Commission Implementing Regulation (EU) 2018\/...\n\nof 27 June 2018\n\namending Implementing Regulation (EU) No 540\/2011 as regards the\nextension of the approval periods of the active substances\nalpha-cypermethrin, beflubutamid, benalaxyl, benthiavalicarb,\nbifenazate, boscalid, bromoxynil, captan, carvone, chlorpropham,\ncyazofamid, desmedipham, dimethoate, dimethomorph, diquat, ethephon,\nethoprophos, etoxazole, famoxadone, fenamidone, fenamiphos,\nflumioxazine, fluoxastrobin, folpet, foramsulfuron, formetanate,\n*Gliocladium catenulatum* strain: J1446, isoxaflutole, metalaxyl-m,\nmethiocarb, methoxyfenozide, metribuzin, milbemectin, oxasulfuron,\n*Paecilomyces lilacinus* strain 251, phenmedipham, phosmet,\npirimiphos-methyl, propamocarb, prothioconazole, pymetrozine and\ns-metolachlor\n\n**(Text with EEA relevance)",
transform = dspy.Predict(SimplePrefaceSignature)
xml = transform(text=text)
# TODO: use a validation function after refactoring the code
print(xml)
for llm in llms:
lm = llms[llm]
dspy.settings.configure(lm=lm)
print("-------------------------")
print("Testing on", llm)
zero_shot_conversion(lm)
```
%% Cell type:code id: tags:
``` python
dspy.configure(experimental=True)
```
%% Cell type:markdown id: tags:
### Choose the LLM
%% Cell type:code id: tags:
``` python
import dspy
# selected_llm = llms["llama3.2.3b"]
selected_llm = llms["llama3.2.3b"]
dspy.settings.configure(lm=selected_llm)
```
%% Cell type:markdown id: tags:
## Step 2: Define Signatures
%% Cell type:code id: tags:
``` python
class GenerateAKN(dspy.Signature):
class SimplePrefaceSignature(dspy.Signature):
"""Create an XML representation of a document preface section in the Akoma Ntoso (AKN) format. Update the docNumber tag (if provided) with the provided doc_number."""
text = dspy.InputField(desc="Raw text format of the document prefece section")
doc_number = dspy.InputField(desc="Document number to be included in the XML representation")
xml = dspy.OutputField(desc="Akoma Ntoso (AKN) XML representation of the input preface")
```
%% Cell type:markdown id: tags:
## Step 3: Building the Transformation Pipeline
%% Cell type:code id: tags:
``` python
class DocumentToXMLPipeline(dspy.Module):
def __init__(self):
super().__init__()
self.transform = dspy.ChainOfThought(GenerateAKN)
self.transform = dspy.ChainOfThought(SimplePrefaceSignature)
def extract_xml(self, content):
# This pattern looks for all <preface> tags and captures until </preface> including newlines and any characters between.
matches = re.findall(r'<preface>.*?</preface>', content, re.DOTALL)
if matches:
return matches[-1] # Return the last match
return "" # Return empty string if no XML part is found
def forward(self, text, doc_number=None):
# Assuming there's some text to process, otherwise return an empty XML structure
if not text:
return ""
# Generate XML for the cover page
xml_preface = self.transform(text=text, doc_number=doc_number)
# Extract the desired XML part
extracted_xml = self.extract_xml(xml_preface.xml)
# Return the extracted XML
return {"xml": extracted_xml}
```
%% Cell type:markdown id: tags:
## Step 4: Executing the Pipeline (0-shot conversion without optimization)
%% Cell type:code id: tags:
``` python
xml_pipeline = DocumentToXMLPipeline()
```
%% Cell type:code id: tags:
``` python
def process_documents(dataset):
preface = [item['text'] for item in dataset]
results = []
for doc in preface:
xml_output = xml_pipeline(doc)
results.append(xml_output)
return results
```
%% Cell type:markdown id: tags:
## Step 5: Optimizing the Pipeline
%% Cell type:markdown id: tags:
### Dataset preparation
%% Cell type:code id: tags:
``` python
def load_data_from_json(file_path):
def load_dataset_from_json(file_path):
with open(file_path, 'r') as file:
data = json.load(file)
return data
def prepare_example(text, xml, doc_number):
# Assuming 'dspy.Example' is the correct class from your DSPy framework
example_data = {
'text': text.strip(), # Using strip() to clean whitespace
'xml': xml.strip()
}
if doc_number is not None:
example_data['doc_number'] = doc_number.strip() # Adding doc_number as additional input if not None
example = dspy.Example(example_data).with_inputs("text", "doc_number" if doc_number is not None else "text")
return example
def create_dataset(data):
return [prepare_example(item['text'], item['xml'], item['doc_number']) for item in data if item.get('doc_number') is not None]
```
%% Cell type:code id: tags:
``` python
file_path = '../data/prefaces/dataset/prefaces.json'
dataset_file_path = '../data/prefaces/dataset.json'
# Load and prepare the dataset
data = load_data_from_json(file_path)
trainset = create_dataset(data)
dataset = load_dataset_from_json(dataset_file_path)
trainset = create_dataset(dataset)
```
%% Cell type:code id: tags:
``` python
len(trainset)
```
%% Output
2898
%% Cell type:code id: tags:
``` python
from rouge_score import rouge_scorer
```
%% Cell type:code id: tags:
``` python
import xml.etree.ElementTree as ET
import lxml.etree as etree
from rouge_score import rouge_scorer
def insert_xml_into_akn_empty(xml_content):
try:
# Load the empty AKN file
tree = etree.parse('../data/akn_files/akn_empty.xml')
root = tree.getroot()
# Parse the generated XML content
generated_xml = etree.fromstring(xml_content)
# Find the meta element
meta_element = root.find('.//{http://docs.oasis-open.org/legaldocml/ns/akn/3.0}meta')
if meta_element is not None:
# Insert the generated XML after the meta element
meta_element.addnext(generated_xml)
else:
print("Meta element not found in the empty AKN file.")
return None, "Meta element not found in the empty AKN file."
# Return the modified AKN XML as a string
return etree.tostring(root, encoding='unicode'), None
except Exception as e:
return None, str(e)
import logging
# Configure logging
logging.basicConfig(filename=f'validation_log_{selected_llm}.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def validate_xml_rouge_score(reference, prediction, trace=None):
# Insert the generated XML into the empty AKN file
akn_xml, error = insert_xml_into_akn_empty(prediction['xml'])
if akn_xml is None:
print(error)
logging.error(error)
return False
# Validate XML structure using validate_akn function
schema = etree.XMLSchema(file='../data/akn_files/schema/akomantoso30.xsd')
is_valid, error_log = validate_akn(akn_xml, schema)
if not is_valid:
print("XML is not valid according to AKN schema")
print(error_log)
logging.error("XML is not valid according to AKN schema")
logging.error(error_log)
return False # Return false if XML is not valid
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
scores = scorer.score(reference['xml'], prediction['xml'])
# Extracting the F1 scores from the results
rouge1_f1 = scores['rouge1'].fmeasure
rougeL_f1 = scores['rougeL'].fmeasure
print("rougeL_f1", scores)
logging.info("rougeL_f1 %s", scores)
if rougeL_f1 <= 0.9:
print(f"Low ROUGE-L F1 Score Detected: {rougeL_f1}")
print("-" * 100)
print("Reference Text:", reference['text'])
print("Reference XML:", reference['xml'])
print("Predicted XML:", prediction['xml'])
print("-" * 100)
logging.warning(f"Low ROUGE-L F1 Score Detected: {rougeL_f1}")
logging.warning("-" * 100)
logging.warning("Reference Text: %s", reference['text'])
logging.warning("Reference XML: %s", reference['xml'])
logging.warning("Predicted XML: %s", prediction['xml'])
logging.warning("-" * 100)
# Setting a threshold for ROUGE-L
return rougeL_f1 >= 0.96 # Threshold can be adjusted as needed
```
%% Cell type:code id: tags:
``` python
from dspy.teleprompt import BootstrapFewShot
```
%% Output
/Users/nasredine/dev/work/playground/dspy_programs/preface/cache/compiler
%% Cell type:code id: tags:
``` python
teleprompter = BootstrapFewShot(metric=validate_xml_rouge_score)
# teleprompter = BootstrapFewShot(metric=validate_xml_rouge_score)
teleprompter = BootstrapFewShot(metric=validate_xml_rouge_score, max_bootstrapped_demos=2, max_labeled_demos=2)
compiled_pipeline = teleprompter.compile(DocumentToXMLPipeline(), trainset=trainset[:50])
```
%% Output
2%|▏ | 1/50 [00:13<11:17, 13.83s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
4%|▍ | 2/50 [00:22<08:26, 10.54s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
0%| | 0/50 [00:00<?, ?it/s]
6%|▌ | 3/50 [00:37<09:50, 12.56s/it]
12%|█▏ | 6/50 [00:19<02:22, 3.23s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
8%|▊ | 4/50 [00:45<08:42, 11.36s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Bootstrapped 4 full traces after 5 examples in round 0.
Bootstrapped 2 full traces after 7 examples in round 0.
%% Cell type:code id: tags:
``` python
from dspy.evaluate import Evaluate
```
%% Cell type:code id: tags:
``` python
# Create a dev set from the remaining data not used in training
devset = create_dataset(data[50:80]) # Using the next 50 examples for the dev set
devset = create_dataset(dataset[50:100]) # Using the next 50 examples for the dev set
# Set up the evaluator
evaluate = Evaluate(devset=devset, metric=validate_xml_rouge_score, num_threads=4, display_progress=True, display_table=0)
evaluate = Evaluate(devset=devset, metric=validate_xml_rouge_score, num_threads=4, display_progress=True)
# Evaluate the compiled pipeline
results = evaluate(compiled_pipeline)
print(results)
```
%% Output
Average Metric: 3 / 3 (100.0): 10%|█ | 3/29 [00:05<00:39, 1.50s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 4 / 4 (100.0): 14%|█▍ | 4/29 [00:06<00:31, 1.25s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 13 / 49 (26.5): 100%|██████████| 49/49 [00:27<00:00, 1.76it/s]
Average Metric: 5 / 5 (100.0): 17%|█▋ | 5/29 [00:10<00:53, 2.21s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 6 / 6 (100.0): 21%|██ | 6/29 [00:14<01:01, 2.69s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 7 / 7 (100.0): 24%|██▍ | 7/29 [00:14<00:43, 1.99s/it]
rougeL_f1 {'rouge1': Score(precision=0.989010989010989, recall=0.9782608695652174, fmeasure=0.9836065573770493), 'rougeL': Score(precision=0.989010989010989, recall=0.9782608695652174, fmeasure=0.9836065573770493)}
Average Metric: 7 / 8 (87.5): 28%|██▊ | 8/29 [00:15<00:32, 1.55s/it]
rougeL_f1 {'rouge1': Score(precision=0.9795918367346939, recall=0.9795918367346939, fmeasure=0.9795918367346939), 'rougeL': Score(precision=0.9081632653061225, recall=0.9081632653061225, fmeasure=0.9081632653061225)}
Average Metric: 8 / 9 (88.9): 31%|███ | 9/29 [00:15<00:24, 1.23s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 8 / 10 (80.0): 34%|███▍ | 10/29 [00:19<00:39, 2.10s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=0.8867924528301887, fmeasure=0.9400000000000001), 'rougeL': Score(precision=1.0, recall=0.8867924528301887, fmeasure=0.9400000000000001)}
Average Metric: 8 / 11 (72.7): 38%|███▊ | 11/29 [00:20<00:28, 1.59s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=0.8867924528301887, fmeasure=0.9400000000000001), 'rougeL': Score(precision=1.0, recall=0.8867924528301887, fmeasure=0.9400000000000001)}
Average Metric: 8 / 12 (66.7): 41%|████▏ | 12/29 [00:21<00:22, 1.30s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=0.8867924528301887, fmeasure=0.9400000000000001), 'rougeL': Score(precision=1.0, recall=0.8867924528301887, fmeasure=0.9400000000000001)}
Average Metric: 9 / 13 (69.2): 45%|████▍ | 13/29 [00:21<00:16, 1.05s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 10 / 14 (71.4): 48%|████▊ | 14/29 [00:24<00:26, 1.76s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 11 / 15 (73.3): 52%|█████▏ | 15/29 [00:25<00:20, 1.46s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 12 / 16 (75.0): 55%|█████▌ | 16/29 [00:26<00:14, 1.14s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 13 / 17 (76.5): 59%|█████▊ | 17/29 [00:26<00:12, 1.07s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 14 / 18 (77.8): 62%|██████▏ | 18/29 [00:29<00:17, 1.60s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 16 / 20 (80.0): 66%|██████▌ | 19/29 [00:30<00:13, 1.35s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 18 / 22 (81.8): 76%|███████▌ | 22/29 [00:34<00:09, 1.29s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 20 / 24 (83.3): 79%|███████▉ | 23/29 [00:36<00:08, 1.36s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 21 / 25 (84.0): 86%|████████▌ | 25/29 [00:39<00:05, 1.41s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 22 / 26 (84.6): 90%|████████▉ | 26/29 [00:40<00:03, 1.26s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 23 / 27 (85.2): 93%|█████████▎| 27/29 [00:40<00:02, 1.17s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 24 / 28 (85.7): 97%|█████████▋| 28/29 [00:41<00:00, 1.05it/s]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
Average Metric: 25 / 29 (86.2): 100%|██████████| 29/29 [00:43<00:00, 1.51s/it]
rougeL_f1 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
86.21
26.53
%% Cell type:code id: tags:
``` python
gpt4o.inspect_history(n=1)
selected_llm.inspect_history(n=1)
```
%% Output
Create an XML representation of a document preface section in the Akoma Ntoso (AKN) format. Update the docNumber tag (if provided) with the provided doc_number.
<|begin_of_text|><|start_header_id|>user<|end_header_id|>Create an XML representation of a document preface section in the Akoma Ntoso (AKN) format. Update the docNumber tag (if provided) with the provided doc_number.
---
Follow the following format.
Text: Raw text format of the document prefece section
Doc Number: Document number to be included in the XML representation
Reasoning: Let's think step by step in order to ${produce the xml}. We ...
Xml: Akoma Ntoso (AKN) XML representation of the input preface
---
Text: COMMISSION IMPLEMENTING REGULATION (EU) 2019/... of 31 January 2019 amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances abamectin, *Bacillus subtilis* (Cohn 1872) Strain QST 713, *Bacillus thuringiensis* subsp. Aizawai, *Bacillus thuringiensis* subsp. israeliensis, *Bacillus thuringiensis* subsp. kurstaki, *Beauveria bassiana*, benfluralin, clodinafop, clopyralid, *Cydia pomonella Granulovirus* (CpGV), cyprodinil, dichlorprop-P, epoxiconazole, fenpyroximate, fluazinam, flutolanil, fosetyl, *Lecanicillium muscarium*, mepanipyrim, mepiquat, *Metarhizium anisopliae* var. Anisopliae, metconazole, metrafenone, *Phlebiopsis gigantea*, pirimicarb, *Pseudomonas chlororaphis* strain: MA 342, pyrimethanil*, Pythium oligandrum*, rimsulfuron, spinosad, *Streptomyces* K61, thiacloprid, tolclofos-methyl, *Trichoderma asperellum*, *Trichoderma atroviride*, *Trichoderma gamsii*, *Trichoderma harzianum*, triclopyr, trinexapac, triticonazole, *Verticillium albo-atrum* and ziram (Text with EEA relevance)
Text:
COMMISSION IMPLEMENTING REGULATION (EU) 2017/\...
Doc Number: 2019/168
of 11 November 2016
Reasoning: Let's think step by step in order to produce the XML. We will start by identifying the key components of the preface section. The preface will include a long title that consists of several paragraphs. The first paragraph will contain the title of the regulation, including the type of document and the document number. The second paragraph will include the date of the document. The third paragraph will contain the main content of the regulation. If there is any additional relevant text, it will be included in a separate paragraph. Now, let's construct the XML representation based on the provided text and document number.
laying down implementing technical standards with regard to standard
forms, templates and procedures for authorisation, review and evaluation
of central securities depositories, for the cooperation between
authorities of the home Member State and the host Member State, for the
consultation of authorities involved in the authorisation to provide
banking-type ancillary services, for access involving central securities
depositories, and with regard to the format of the records to be
maintained by central securities depositories in accordance with
Regulation (EU) No 909/2014 of the European Parliament and of the
Council
Xml: ```xml <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/168</docNumber></span> </p> <p>of<docDate date="2019-01-31">31 January 2019</docDate></p> <p>amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances abamectin, <span class="ITALIC">Bacillus subtilis</span> (Cohn 1872) Strain QST 713, <span class="ITALIC">Bacillus thuringiensis</span> subsp. Aizawai, <span class="ITALIC">Bacillus thuringiensis</span> subsp. israeliensis, <span class="ITALIC">Bacillus thuringiensis</span> subsp. kurstaki, <span class="ITALIC">Beauveria bassiana</span>, benfluralin, clodinafop, clopyralid, <span class="ITALIC">Cydia pomonella Granulovirus</span> (CpGV), cyprodinil, dichlorprop-P, epoxiconazole, fenpyroximate, fluazinam, flutolanil, fosetyl, <span class="ITALIC">Lecanicillium muscarium</span>, mepanipyrim, mepiquat, <span class="ITALIC">Metarhizium anisopliae</span> var. Anisopliae, metconazole, metrafenone, <span class="ITALIC">Phlebiopsis gigantea</span>, pirimicarb, <span class="ITALIC">Pseudomonas chlororaphis</span> strain: MA 342, pyrimethanil, <span class="ITALIC">Pythium oligandrum</span>, rimsulfuron, spinosad, <span class="ITALIC">Streptomyces</span> K61, thiacloprid, tolclofos-methyl, <span class="ITALIC">Trichoderma asperellum</span>, <span class="ITALIC">Trichoderma atroviride</span>, <span class="ITALIC">Trichoderma gamsii</span>, <span class="ITALIC">Trichoderma harzianum</span>, triclopyr, trinexapac, triticonazole, <span class="ITALIC">Verticillium albo-atrum</span> and ziram</p> <p>(Text with EEA relevance)</p> </longTitle> </preface> ```
**(Text with EEA relevance)
---
Doc Number: 2017/394
Text: COMMISSION IMPLEMENTING REGULATION (EU) 2020/... of 8 December 2020 amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances 1-decanol, 1,4-dimethylnaphthalene, 6-benzyladenine, acequinocyl, *Adoxophyes orana granulovirus*, aluminium sulfate, amisulbrom, *Aureobasidium pullulans* (strains DSM 14940 and DSM 14941), azadirachtin, *Bacillus pumilus* QST 2808, benalaxyl-M, bixafen, bupirimate, *Candida oleophila* strain O, chlorantraniliprole, disodium phosphonate, dithianon, dodine, emamectin, flubendiamide, fluometuron, fluxapyroxad, flutriafol, hexythiazox, imazamox, ipconazole, isoxaben, L-ascorbic acid, lime sulphur, orange oil, *Paecilomyces fumosoroseus* strain FE 9901, pendimethalin, penflufen, penthiopyrad, potassium phosphonates, prosulfuron, *Pseudomonas sp*. strain DSMZ 13134, pyridalyl, pyriofenone, pyroxsulam, quinmerac, S-abscisic acid, sedaxane, sintofen, sodium silver thiosulfate, spinetoram, spirotetramat, *Streptomyces lydicus* strain WYEC 108, tau-fluvalinate, tebufenozide, tembotrione, thiencarbazone, valifenalate, zinc phosphide
Reasoning: Let's think step by step in order to The provided text is a document preface section in the Akoma Ntoso (AKN) format. The docNumber tag should be updated with the provided doc_number. The longTitle element contains the title of the document, which includes the docType and docNumber. The docDate element is used to specify the date of the document. The text within the longTitle element provides a brief description of the document.
Doc Number: 2020/2007
Xml:
<preface>
Reasoning: Let's think step by step in order to produce the XML. We will start by identifying the key components of the preface section, which includes the title, document type, document number, and date. We will format these components according to the Akoma Ntoso (AKN) XML structure. 1. **Title**: The title begins with "Commission Implementing Regulation" followed by the document type and number. 2. **Date**: The date is provided in the format "day month year". 3. **Content**: The main content of the preface follows the title and date. Now, let's construct the XML representation based on the provided text and document number.
<longTitle>
Xml: ```xml <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2020/2007</docNumber></span> </p> <p>of<docDate date="2020-12-08">8 December 2020</docDate></p> <p>amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances 1-decanol, 1,4-dimethylnaphthalene, 6-benzyladenine, acequinocyl, <span class="ITALIC">Adoxophyes orana granulovirus</span>, aluminium sulfate, amisulbrom, <span class="ITALIC">Aureobasidium pullulans</span> (strains DSM 14940 and DSM 14941), azadirachtin, <span class="ITALIC">Bacillus pumilus</span> QST 2808, benalaxyl-M, bixafen, bupirimate, <span class="ITALIC">Candida oleophila</span> strain O, chlorantraniliprole, disodium phosphonate, dithianon, dodine, emamectin, flubendiamide, fluometuron, fluxapyroxad, flutriafol, hexythiazox, imazamox, ipconazole, isoxaben, L-ascorbic acid, lime sulphur, orange oil, <span class="ITALIC">Paecilomyces fumosoroseus</span> strain FE 9901, pendimethalin, penflufen, penthiopyrad, potassium phosphonates, prosulfuron, <span class="ITALIC">Pseudomonas sp</span>. strain DSMZ 13134, pyridalyl, pyriofenone, pyroxsulam, quinmerac, S-abscisic acid, sedaxane, sintofen, sodium silver thiosulfate, spinetoram, spirotetramat, <span class="ITALIC">Streptomyces lydicus</span> strain WYEC 108, tau-fluvalinate, tebufenozide, tembotrione, thiencarbazone, valifenalate, zinc phosphide</p> </longTitle> </preface> ```
<p>
---
<span>COMMISSION IMPLEMENTING REGULATION (EU) 2017/394</span>
Text: Commission Implementing Regulation (EU) 2018/... of 27 June 2018 amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances alpha-cypermethrin, beflubutamid, benalaxyl, benthiavalicarb, bifenazate, boscalid, bromoxynil, captan, carvone, chlorpropham, cyazofamid, desmedipham, dimethoate, dimethomorph, diquat, ethephon, ethoprophos, etoxazole, famoxadone, fenamidone, fenamiphos, flumioxazine, fluoxastrobin, folpet, foramsulfuron, formetanate, *Gliocladium catenulatum* strain: J1446, isoxaflutole, metalaxyl-m, methiocarb, methoxyfenozide, metribuzin, milbemectin, oxasulfuron, *Paecilomyces lilacinus* strain 251, phenmedipham, phosmet, pirimiphos-methyl, propamocarb, prothioconazole, pymetrozine and s-metolachlor **(Text with EEA relevance)
</p>
Doc Number: 2018/917
<p>of<docDate date="2017-11-11">11 November 2016</docDate></p>
Reasoning: Let's think step by step in order to produce the XML. We will start by identifying the key components of the preface section, which includes the title, document type, document number, and date. We will then format these components according to the Akoma Ntoso (AKN) XML structure. 1. **Identify the Title**: The title is "Commission Implementing Regulation (EU) 2018/... of 27 June 2018 amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances alpha-cypermethrin, beflubutamid, benalaxyl, benthiavalicarb, bifenazate, boscalid, bromoxynil, captan, carvone, chlorpropham, cyazofamid, desmedipham, dimethoate, dimethomorph, diquat, ethephon, ethoprophos, etoxazole, famoxadone, fenamidone, fenamiphos, flumioxazine, fluoxastrobin, folpet, foramsulfuron, formetanate, *Gliocladium catenulatum* strain: J1446, isoxaflutole, metalaxyl-m, methiocarb, methoxyfenozide, metribuzin, milbemectin, oxasulfuron, *Paecilomyces lilacinus* strain 251, phenmedipham, phosmet, pirimiphos-methyl, propamocarb, prothioconazole, pymetrozine and s-metolachlor (Text with EEA relevance)". 2. **Document Type**: The document type is "Regulation". 3. **Document Number**: The document number is "2018/917". 4. **Date**: The date is "27 June 2018". 5. **Construct the XML**: We will format these components into the AKN XML structure. Now, let's create the XML representation.
<p>laying down implementing technical standards with regard to standard forms, templates and procedures for authorisation, review and evaluation of central securities depositories, for the cooperation between authorities of the home Member State and the host Member State, for the consultation of authorities involved in the authorisation to provide banking-type ancillary services, for access involving central securities depositories, and with regard to the format of the records to be maintained by central securities depositories in accordance with Regulation (EU) No 909/2014 of the European Parliament and of the Council</p>
Xml: ```xml <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2018/917</docNumber></span> </p> <p>of<docDate date="2018-06-27">27 June 2018</docDate></p> <p>amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances alpha-cypermethrin, beflubutamid, benalaxyl, benthiavalicarb, bifenazate, boscalid, bromoxynil, captan, carvone, chlorpropham, cyazofamid, desmedipham, dimethoate, dimethomorph, diquat, ethephon, ethoprophos, etoxazole, famoxadone, fenamidone, fenamiphos, flumioxazine, fluoxastrobin, folpet, foramsulfuron, formetanate, <span class="ITALIC">Gliocladium catenulatum</span> strain: J1446, isoxaflutole, metalaxyl-m, methiocarb, methoxyfenozide, metribuzin, milbemectin, oxasulfuron, <span class="ITALIC">Paecilomyces lilacinus</span> strain 251, phenmedipham, phosmet, pirimiphos-methyl, propamocarb, prothioconazole, pymetrozine and s-metolachlor</p> <p>(Text with EEA relevance)</p> </longTitle> </preface> ```
<p>(Text with EEA relevance)</p>
---
Text: Commission Delegated Regulation (EU) 2016/... of 18 February 2016 correcting Commission Delegated Regulation (EU) No 528/2014 supplementing Regulation (EU) No 575/2013 of the European Parliament and of the Council with regard to regulatory technical standards for non-delta risk of options in the standardised market risk approach and correcting Commission Delegated Regulation (EU) No 604/2014 supplementing Directive 2013/36/EU of the European Parliament and of the Council with regard to regulatory technical standards with respect to qualitative and appropriate quantitative criteria to identify categories of staff whose professional activities have a material impact on an institution's risk profile (Text with EEA relevance)
</longTitle>
Doc Number: 2016/861
</preface>
Reasoning: Let's think step by step in order to produce the XML. We will start by identifying the key components of the preface section, which includes the title, document type, document number, and date. We will then format these components according to the Akoma Ntoso (AKN) XML structure. 1. **Identify the Title**: The title is "Commission Delegated Regulation (EU) 2016/...". 2. **Document Type**: The document type is "Regulation". 3. **Document Number**: The document number is "2016/861". 4. **Document Date**: The date is "18 February 2016". 5. **Content**: The content of the preface includes the full description of the regulation. Now, we will structure this information into the AKN XML format.
---
Xml: ```xml <preface> <longTitle> <p> <span>Commission Delegated<docType>Regulation</docType>(EU) No<docNumber>2016/861</docNumber></span> </p> <p>of<docDate date="2016-02-18">18 February 2016</docDate></p> <p>correcting Commission Delegated Regulation (EU) No 528/2014 supplementing Regulation (EU) No 575/2013 of the European Parliament and of the Council with regard to regulatory technical standards for non-delta risk of options in the standardised market risk approach and correcting Commission Delegated Regulation (EU) No 604/2014 supplementing Directive 2013/36/EU of the European Parliament and of the Council with regard to regulatory technical standards with respect to qualitative and appropriate quantitative criteria to identify categories of staff whose professional activities have a material impact on an institution's risk profile</p> <p>(Text with EEA relevance)</p> </longTitle> </preface> ```
Text:
Commission Implementing Regulation (EU) 2017/...
---
of 17 May 2017
Text: Commission Implementing Regulation (EU) 2019/... of 17 May 2019 correcting the Greek language version of Implementing Regulation (EU) 2018/775 laying down rules for the application of Article 26(3) of Regulation (EU) No 1169/2011 of the European Parliament and of the Council on the provision of food information to consumers, as regards the rules for indicating the country of origin or place of provenance of the primary ingredient of a food (Text with EEA relevance)
Doc Number: 2019/802
Xml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/802</docNumber></span> </p> <p>of<docDate date="2019-05-17">17 May 2019</docDate></p> <p>correcting the Greek language version of Implementing Regulation (EU) 2018/775 laying down rules for the application of Article 26(3) of Regulation (EU) No 1169/2011 of the European Parliament and of the Council on the provision of food information to consumers, as regards the rules for indicating the country of origin or place of provenance of the primary ingredient of a food</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>
amending Implementing Regulation (EU) No 540/2011 as regards the
extension of the approval periods of the active substances
alpha-cypermethrin, Ampelomyces quisqualis strain: aq 10, benalaxyl,
bentazone, bifenazate, bromoxynil, carfentrazone ethyl, chlorpropham,
cyazofamid, desmedipham, diquat, DPX KE 459 (flupyrsulfuron-methyl),
etoxazole, famoxadone, fenamidone, flumioxazine, foramsulfuron,
Gliocladium catenulatum strain: j1446, imazamox, imazosulfuron,
isoxaflutole, laminarin, metalaxyl-m, methoxyfenozide, milbemectin,
oxasulfuron, pendimethalin, phenmedipham, pymetrozine, s-metolachlor,
and trifloxystrobin
---
(Text with EEA relevance)
Text: Commission Delegated Regulation (EU) 2019/... of 12 March 2019 supplementing Regulation (EU) No 1308/2013 of the European Parliament and of the Council as regards wine-growing areas where the alcoholic strength may be increased, authorised oenological practices and restrictions applicable to the production and conservation of grapevine products, the minimum percentage of alcohol for by-products and their disposal, and publication of OIV files
Doc Number: 2019/934
Xml: <preface> <longTitle> <p> <span>Commission Delegated<docType>Regulation</docType>(EU) No<docNumber>2019/934</docNumber></span> </p> <p>of<docDate date="2019-03-12">12 March 2019</docDate></p> <p>supplementing Regulation (EU) No 1308/2013 of the European Parliament and of the Council as regards wine-growing areas where the alcoholic strength may be increased, authorised oenological practices and restrictions applicable to the production and conservation of grapevine products, the minimum percentage of alcohol for by-products and their disposal, and publication of OIV files</p> </longTitle> </preface>
Doc Number: 2017/841
---
Reasoning: Let's think step by step in order to produce the xml. We need to identify the elements and attributes that make up the preface section in the Akoma Ntoso (AKN) format. The preface section typically includes a long title with a document type and number, and a date. We will use the provided text to populate these elements and attributes.
Text: COMMISSION REGULATION (EU) 2019/... of 29 November 2019 amending Regulation (EC) No 1126/2008 adopting certain international accounting standards in accordance with Regulation (EC) No 1606/2002 of the European Parliament and of the Council as regards International Accounting Standards 1, 8, 34, 37 and 38, International Financial Reporting Standards 2, 3 and 6, Interpretations 12, 19, 20 and 22 of the International Financial Reporting Interpretations Committee and Interpretation 32 of the Standing Interpretations Committee **(Text with EEA relevance)
Doc Number: 2019/2075
Xml: <preface> <longTitle> <p> <span>Commission<docType>Regulation</docType>(EU) No<docNumber>2019/2075</docNumber></span> </p> <p>of<docDate date="2019-11-29">29 November 2019</docDate></p> <p>amending Regulation (EC) No 1126/2008 adopting certain international accounting standards in accordance with Regulation (EC) No 1606/2002 of the European Parliament and of the Council as regards International Accounting Standards 1, 8, 34, 37 and 38, International Financial Reporting Standards 2, 3 and 6, Interpretations 12, 19, 20 and 22 of the International Financial Reporting Interpretations Committee and Interpretation 32 of the Standing Interpretations Committee</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>
Xml:
<preface>
---
<longTitle>
Text: Commission Implementing Regulation (EU) 2019/... of 13 February 2019 amending Implementing Regulation (EU) No 821/2014 laying down rules for the application of Regulation (EU) No 1303/2013 of the European Parliament and of the Council as regards detailed arrangements for the transfer and management of programme contributions, the reporting on financial instruments, technical characteristics of information and communication measures for operations and the system to record and store data
Doc Number: 2019/255
Xml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/255</docNumber></span> </p> <p>of<docDate date="2019-02-13">13 February 2019</docDate></p> <p>amending Implementing Regulation (EU) No 821/2014 laying down rules for the application of Regulation (EU) No 1303/2013 of the European Parliament and of the Council as regards detailed arrangements for the transfer and management of programme contributions, the reporting on financial instruments, technical characteristics of information and communication measures for operations and the system to record and store data</p> </longTitle> </preface>
<p>
---
<span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2017/841</docNumber></span>
Text: Commission Implementing Regulation (EU) 2018/... of 7 March 2018 concerning the authorisation of a preparation of 6-phytase, produced by *Aspergillus niger* (DSM 25770) as feed additive for chickens for fattening, chickens reared for laying, pigs for fattening, sows, minor porcine species for fattening or for reproduction, turkeys for fattening, turkeys reared for breeding, all other avian species (excluding laying birds) and weaned piglets (holder of the authorisation BASF SE) **(Text with EEA relevance)
Doc Number: 2018/338
Xml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2018/338</docNumber></span> </p> <p>of<docDate date="2018-03-07">7 March 2018</docDate></p> <p>concerning the authorisation of a preparation of 6-phytase, produced by<span class="ITALIC">Aspergillus niger</span>(DSM 25770) as feed additive for chickens for fattening, chickens reared for laying, pigs for fattening, sows, minor porcine species for fattening or for reproduction, turkeys for fattening, turkeys reared for breeding, all other avian species (excluding laying birds) and weaned piglets (holder of the authorisation BASF SE)</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>
</p>
---
<p>of<docDate date="2017-05-17">17 May 2017</docDate></p>
Text: COMMISSION IMPLEMENTING REGULATION (EU) 2020/... of 5 February 2020 concerning the authorisation of a preparation of muramidase produced by *Trichoderma reesei* DSM 32338 as a feed additive for turkeys for fattening, turkeys reared for breeding, chickens reared for breeding and other poultry species reared for breeding (holder of authorisation DSM Nutritional Products Ltd. represented in the Union by DSM Nutritional Products Sp. Z o.o) **(Text with EEA relevance)
Doc Number: 2020/163
Xml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2020/163</docNumber></span> </p> <p>of<docDate date="2020-02-05">5 February 2020</docDate></p> <p>concerning the authorisation of a preparation of muramidase produced by<span class="ITALIC">Trichoderma reesei</span>DSM 32338 as a feed additive for turkeys for fattening, turkeys reared for breeding, chickens reared for breeding and other poultry species reared for breeding (holder of authorisation DSM Nutritional Products Ltd. represented in the Union by DSM Nutritional Products Sp. Z o.o)</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>
<p>amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances alpha-cypermethrin, Ampelomyces quisqualis strain: aq 10, benalaxyl, bentazone, bifenazate, bromoxynil, carfentrazone ethyl, chlorpropham, cyazofamid, desmedipham, diquat, DPX KE 459 (flupyrsulfuron-methyl), etoxazole, famoxadone, fenamidone, flumioxazine, foramsulfuron, Gliocladium catenulatum strain: j1446, imazamox, imazosulfuron, isoxaflutole, laminarin, metalaxyl-m, methoxyfenozide, milbemectin, oxasulfuron, pendimethalin, phenmedipham, pymetrozine, s-metolachlor, and trifloxystrobin</p>
---
</longTitle>
Text: Commission Implementing Regulation (EU) 2018/... of 12 March 2018 amending Implementing Regulation (EU) No 1419/2013 concerning the recognition of producer organisations and inter-branch organisations, the extension of the rules of producer organisations and inter-branch organisations and the publication of trigger prices as provided for by Regulation (EU) No 1379/2013 of the European Parliament and of the Council on the common organisation of the markets in fishery and aquaculture products
Doc Number: 2018/390
Xml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2018/390</docNumber></span> </p> <p>of<docDate date="2018-03-12">12 March 2018</docDate></p> <p>amending Implementing Regulation (EU) No 1419/2013 concerning the recognition of producer organisations and inter-branch organisations, the extension of the rules of producer organisations and inter-branch organisations and the publication of trigger prices as provided for by Regulation (EU) No 1379/2013 of the European Parliament and of the Council on the common organisation of the markets in fishery and aquaculture products</p> </longTitle> </preface>
</preface>
---
Text: Commission Implementing Regulation (EU) 2019/... of 29 April 2019 approving the low-risk active substance ABE-IT 56 (components of lysate of *Saccharomyces cerevisiae* strain DDSF623), in accordance with Regulation (EC) No 1107/2009 of the European Parliament and of the Council concerning the placing of plant protection products on the market, and amending the Annex to Commission Implementing Regulation (EU) No 540/2011 (Text with EEA relevance)
Doc Number: 2019/676
Xml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/676</docNumber></span> </p> <p>of<docDate date="2019-04-29">29 April 2019</docDate></p> <p>approving the low-risk active substance ABE-IT 56 (components of lysate of<span class="ITALIC">Saccharomyces cerevisiae</span>strain DDSF623), in accordance with Regulation (EC) No 1107/2009 of the European Parliament and of the Council concerning the placing of plant protection products on the market, and amending the Annex to Commission Implementing Regulation (EU) No 540/2011</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>
Text:
**COMMISSION IMPLEMENTING REGULATION (EU) 2017/...**
---
**of 10 February 2017**
Text: Commission Implementing Regulation (EU) 2019/... of 20 February 2019 correcting certain language versions of Implementing Regulation (EU) 2016/2286 laying down detailed rules on the application of fair use policy and on the methodology for assessing the sustainability of the abolition of retail roaming surcharges and on the application to be submitted by a roaming provider for the purposes of that assessment **(Text with EEA relevance)
Doc Number: 2019/296
Xml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/296</docNumber></span> </p> <p>of<docDate date="2019-02-20">20 February 2019</docDate></p> <p>correcting certain language versions of Implementing Regulation (EU) 2016/2286 laying down detailed rules on the application of fair use policy and on the methodology for assessing the sustainability of the abolition of retail roaming surcharges and on the application to be submitted by a roaming provider for the purposes of that assessment</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>
**concerning the non-renewal of approval of the active substance
linuron, in accordance with Regulation (EC) No 1107/2009 of the European
Parliament and of the Council concerning the placing of plant protection
products on the market, and amending the Annex to Commission
Implementing Regulation (EU) No 540/2011**
---
**(Text with EEA relevance)
Text: Commission Implementing Regulation (EU) 2019/\... of 7 February 2019 correcting certain language versions of Implementing Regulation (EU) 2017/2330 concerning the authorisation of Iron(II) carbonate, Iron(III) chloride hexahydrate, Iron(II) sulphate monohydrate, Iron(II) sulphate heptahydrate, Iron(II) fumarate, Iron(II) chelate of amino acids hydrate, Iron(II) chelate of protein hydrolysates and Iron(II) chelate of glycine hydrate as feed additives for all animal species and of Iron dextran as feed additive for piglets and amending Regulations (EC) No 1334/2003 and (EC) No 479/2006 (Text with EEA relevance)
Doc Number: 2019/230
Xml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/230</docNumber></span> </p> <p>of<docDate date="2019-02-07">7 February 2019</docDate></p> <p>correcting certain language versions of Implementing Regulation (EU) 2017/2330 concerning the authorisation of Iron(II) carbonate, Iron(III) chloride hexahydrate, Iron(II) sulphate monohydrate, Iron(II) sulphate heptahydrate, Iron(II) fumarate, Iron(II) chelate of amino acids hydrate, Iron(II) chelate of protein hydrolysates and Iron(II) chelate of glycine hydrate as feed additives for all animal species and of Iron dextran as feed additive for piglets and amending Regulations (EC) No 1334/2003 and (EC) No 479/2006</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>
Doc Number: 2017/244
---
Please provide the output fields Reasoning then Xml. Do so immediately, without additional content before or after, and precisely as the format above shows. Begin with the field Reasoning.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
Text: Commission Implementing Regulation (EU) 2016/... of 15 June 2016 amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances 2,4-DB, beta-cyfluthrin, carfentrazone ethyl, *Coniothyrium minitans* Strain CON/M/91-08 (DSM 9660), cyazofamid, deltamethrin, dimethenamid-P, ethofumesate, fenamidone, flufenacet, flurtamone, foramsulfuron, fosthiazate, imazamox, iodosulfuron, iprodione, isoxaflutole, linuron, maleic hydrazide, mesotrione, oxasulfuron, pendimethalin, picoxystrobin, silthiofam and trifloxystrobin **(Text with EEA relevance)
Doc Number: 2016/950
Xml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2016/950</docNumber></span> </p> <p>of<docDate date="2016-06-15">15 June 2016</docDate></p> <p>amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances 2,4-DB, beta-cyfluthrin, carfentrazone ethyl,<span class="ITALIC">Coniothyrium minitans</span>Strain CON/M/91-08 (DSM 9660), cyazofamid, deltamethrin, dimethenamid-P, ethofumesate, fenamidone, flufenacet, flurtamone, foramsulfuron, fosthiazate, imazamox, iodosulfuron, iprodione, isoxaflutole, linuron, maleic hydrazide, mesotrione, oxasulfuron, pendimethalin, picoxystrobin, silthiofam and trifloxystrobin</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>
Reasoning: Let's think step by step in order to produce the xml. We need to identify the elements and attributes that make up the preface section in the Akoma Ntoso (AKN) format. The preface section typically includes a long title with a document type and number, and a date. We will use the provided text to populate these elements and attributes.
---
Xml:
<preface>
Text: Commission Implementing Regulation (EU) 2020/\... of 19 December 2019 approving non-minor amendments to the specification for a name entered in the register of protected designations of origin and protected geographical indications ('Huile d'olive de la Vallée des Baux-de-Provence' (PDO))
Doc Number: 2020/7
Xml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2020/7</docNumber></span> </p> <p>of<docDate date="2019-12-19">19 December 2019</docDate></p> <p>approving non-minor amendments to the specification for a name entered in the register of protected designations of origin and protected geographical indications ( "Huile d’olive de la Vallée des Baux-de-Provence" (PDO))</p> </longTitle> </preface>
<longTitle>
---
<p>
Text: Commission Implementing Regulation (EU) 2019/... of 14 February 2019 amending Council Implementing Regulation (EU) No 430/2013 imposing a definitive anti-dumping duty and collecting definitively the provisional duty imposed on imports of threaded tube or pipe cast fittings, of malleable cast iron, originating in the People's Republic of China and Thailand and terminating the proceeding with regard to Indonesia
<span>COMMISSION IMPLEMENTING REGULATION (EU) 2017/244</span>
Doc Number: 2019/262
</p>
Reasoning: Let's think step by step in order to produce the XML. We will start by identifying the key components of the preface section, which includes the title, document type, document number, and date. We will then format these components according to the Akoma Ntoso (AKN) XML structure.
<p>of<docDate date="2017-02-10">10 February 2017</docDate></p>
1. **Identify the Title**: The title is "Commission Implementing Regulation (EU) 2019/...".
2. **Document Type**: The document type is "Regulation".
3. **Document Number**: The document number is "2019/262".
4. **Document Date**: The date is "14 February 2019".
5. **Content**: The content of the preface includes the full description of the regulation.
<p>concerning the non-renewal of approval of the active substance linuron, in accordance with Regulation (EC) No 1107/2009 of the European Parliament and of the Council concerning the placing of plant protection products on the market, and amending the Annex to Commission Implementing Regulation (EU) No 540/2011</p>
Now, we will structure this information into the AKN XML format.
</longTitle>
Xml: ```xml
<preface>
<longTitle>
<p>
<span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/262</docNumber></span>
</p>
<p>of<docDate date="2019-02-14">14 February 2019</docDate></p>
<p>amending Council Implementing Regulation (EU) No 430/2013 imposing a definitive anti-dumping duty and collecting definitively the provisional duty imposed on imports of threaded tube or pipe cast fittings, of malleable cast iron, originating in the People's Republic of China and Thailand and terminating the proceeding with regard to Indonesia</p>
</longTitle>
</preface>
```
</preface>
'\n\n\nCreate an XML representation of a document preface section in the Akoma Ntoso (AKN) format. Update the docNumber tag (if provided) with the provided doc_number.\n\n---\n\nFollow the following format.\n\nText: Raw text format of the document prefece section\n\nDoc Number: Document number to be included in the XML representation\n\nReasoning: Let\'s think step by step in order to ${produce the xml}. We ...\n\nXml: Akoma Ntoso (AKN) XML representation of the input preface\n\n---\n\nText: COMMISSION IMPLEMENTING REGULATION (EU) 2019/... of 31 January 2019 amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances abamectin, *Bacillus subtilis* (Cohn 1872) Strain QST 713, *Bacillus thuringiensis* subsp. Aizawai, *Bacillus thuringiensis* subsp. israeliensis, *Bacillus thuringiensis* subsp. kurstaki, *Beauveria bassiana*, benfluralin, clodinafop, clopyralid, *Cydia pomonella Granulovirus* (CpGV), cyprodinil, dichlorprop-P, epoxiconazole, fenpyroximate, fluazinam, flutolanil, fosetyl, *Lecanicillium muscarium*, mepanipyrim, mepiquat, *Metarhizium anisopliae* var. Anisopliae, metconazole, metrafenone, *Phlebiopsis gigantea*, pirimicarb, *Pseudomonas chlororaphis* strain: MA 342, pyrimethanil*, Pythium oligandrum*, rimsulfuron, spinosad, *Streptomyces* K61, thiacloprid, tolclofos-methyl, *Trichoderma asperellum*, *Trichoderma atroviride*, *Trichoderma gamsii*, *Trichoderma harzianum*, triclopyr, trinexapac, triticonazole, *Verticillium albo-atrum* and ziram (Text with EEA relevance)\n\nDoc Number: 2019/168\n\nReasoning: Let\'s think step by step in order to produce the XML. We will start by identifying the key components of the preface section. The preface will include a long title that consists of several paragraphs. The first paragraph will contain the title of the regulation, including the type of document and the document number. The second paragraph will include the date of the document. The third paragraph will contain the main content of the regulation. If there is any additional relevant text, it will be included in a separate paragraph. Now, let\'s construct the XML representation based on the provided text and document number.\n\nXml: ```xml <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/168</docNumber></span> </p> <p>of<docDate date="2019-01-31">31 January 2019</docDate></p> <p>amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances abamectin, <span class="ITALIC">Bacillus subtilis</span> (Cohn 1872) Strain QST 713, <span class="ITALIC">Bacillus thuringiensis</span> subsp. Aizawai, <span class="ITALIC">Bacillus thuringiensis</span> subsp. israeliensis, <span class="ITALIC">Bacillus thuringiensis</span> subsp. kurstaki, <span class="ITALIC">Beauveria bassiana</span>, benfluralin, clodinafop, clopyralid, <span class="ITALIC">Cydia pomonella Granulovirus</span> (CpGV), cyprodinil, dichlorprop-P, epoxiconazole, fenpyroximate, fluazinam, flutolanil, fosetyl, <span class="ITALIC">Lecanicillium muscarium</span>, mepanipyrim, mepiquat, <span class="ITALIC">Metarhizium anisopliae</span> var. Anisopliae, metconazole, metrafenone, <span class="ITALIC">Phlebiopsis gigantea</span>, pirimicarb, <span class="ITALIC">Pseudomonas chlororaphis</span> strain: MA 342, pyrimethanil, <span class="ITALIC">Pythium oligandrum</span>, rimsulfuron, spinosad, <span class="ITALIC">Streptomyces</span> K61, thiacloprid, tolclofos-methyl, <span class="ITALIC">Trichoderma asperellum</span>, <span class="ITALIC">Trichoderma atroviride</span>, <span class="ITALIC">Trichoderma gamsii</span>, <span class="ITALIC">Trichoderma harzianum</span>, triclopyr, trinexapac, triticonazole, <span class="ITALIC">Verticillium albo-atrum</span> and ziram</p> <p>(Text with EEA relevance)</p> </longTitle> </preface> ```\n\n---\n\nText: COMMISSION IMPLEMENTING REGULATION (EU) 2020/... of 8 December 2020 amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances 1-decanol, 1,4-dimethylnaphthalene, 6-benzyladenine, acequinocyl, *Adoxophyes orana granulovirus*, aluminium sulfate, amisulbrom, *Aureobasidium pullulans* (strains DSM 14940 and DSM 14941), azadirachtin, *Bacillus pumilus* QST 2808, benalaxyl-M, bixafen, bupirimate, *Candida oleophila* strain O, chlorantraniliprole, disodium phosphonate, dithianon, dodine, emamectin, flubendiamide, fluometuron, fluxapyroxad, flutriafol, hexythiazox, imazamox, ipconazole, isoxaben, L-ascorbic acid, lime sulphur, orange oil, *Paecilomyces fumosoroseus* strain FE 9901, pendimethalin, penflufen, penthiopyrad, potassium phosphonates, prosulfuron, *Pseudomonas sp*. strain DSMZ 13134, pyridalyl, pyriofenone, pyroxsulam, quinmerac, S-abscisic acid, sedaxane, sintofen, sodium silver thiosulfate, spinetoram, spirotetramat, *Streptomyces lydicus* strain WYEC 108, tau-fluvalinate, tebufenozide, tembotrione, thiencarbazone, valifenalate, zinc phosphide\n\nDoc Number: 2020/2007\n\nReasoning: Let\'s think step by step in order to produce the XML. We will start by identifying the key components of the preface section, which includes the title, document type, document number, and date. We will format these components according to the Akoma Ntoso (AKN) XML structure. 1. **Title**: The title begins with "Commission Implementing Regulation" followed by the document type and number. 2. **Date**: The date is provided in the format "day month year". 3. **Content**: The main content of the preface follows the title and date. Now, let\'s construct the XML representation based on the provided text and document number.\n\nXml: ```xml <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2020/2007</docNumber></span> </p> <p>of<docDate date="2020-12-08">8 December 2020</docDate></p> <p>amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances 1-decanol, 1,4-dimethylnaphthalene, 6-benzyladenine, acequinocyl, <span class="ITALIC">Adoxophyes orana granulovirus</span>, aluminium sulfate, amisulbrom, <span class="ITALIC">Aureobasidium pullulans</span> (strains DSM 14940 and DSM 14941), azadirachtin, <span class="ITALIC">Bacillus pumilus</span> QST 2808, benalaxyl-M, bixafen, bupirimate, <span class="ITALIC">Candida oleophila</span> strain O, chlorantraniliprole, disodium phosphonate, dithianon, dodine, emamectin, flubendiamide, fluometuron, fluxapyroxad, flutriafol, hexythiazox, imazamox, ipconazole, isoxaben, L-ascorbic acid, lime sulphur, orange oil, <span class="ITALIC">Paecilomyces fumosoroseus</span> strain FE 9901, pendimethalin, penflufen, penthiopyrad, potassium phosphonates, prosulfuron, <span class="ITALIC">Pseudomonas sp</span>. strain DSMZ 13134, pyridalyl, pyriofenone, pyroxsulam, quinmerac, S-abscisic acid, sedaxane, sintofen, sodium silver thiosulfate, spinetoram, spirotetramat, <span class="ITALIC">Streptomyces lydicus</span> strain WYEC 108, tau-fluvalinate, tebufenozide, tembotrione, thiencarbazone, valifenalate, zinc phosphide</p> </longTitle> </preface> ```\n\n---\n\nText: Commission Implementing Regulation (EU) 2018/... of 27 June 2018 amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances alpha-cypermethrin, beflubutamid, benalaxyl, benthiavalicarb, bifenazate, boscalid, bromoxynil, captan, carvone, chlorpropham, cyazofamid, desmedipham, dimethoate, dimethomorph, diquat, ethephon, ethoprophos, etoxazole, famoxadone, fenamidone, fenamiphos, flumioxazine, fluoxastrobin, folpet, foramsulfuron, formetanate, *Gliocladium catenulatum* strain: J1446, isoxaflutole, metalaxyl-m, methiocarb, methoxyfenozide, metribuzin, milbemectin, oxasulfuron, *Paecilomyces lilacinus* strain 251, phenmedipham, phosmet, pirimiphos-methyl, propamocarb, prothioconazole, pymetrozine and s-metolachlor **(Text with EEA relevance)\n\nDoc Number: 2018/917\n\nReasoning: Let\'s think step by step in order to produce the XML. We will start by identifying the key components of the preface section, which includes the title, document type, document number, and date. We will then format these components according to the Akoma Ntoso (AKN) XML structure. 1. **Identify the Title**: The title is "Commission Implementing Regulation (EU) 2018/... of 27 June 2018 amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances alpha-cypermethrin, beflubutamid, benalaxyl, benthiavalicarb, bifenazate, boscalid, bromoxynil, captan, carvone, chlorpropham, cyazofamid, desmedipham, dimethoate, dimethomorph, diquat, ethephon, ethoprophos, etoxazole, famoxadone, fenamidone, fenamiphos, flumioxazine, fluoxastrobin, folpet, foramsulfuron, formetanate, *Gliocladium catenulatum* strain: J1446, isoxaflutole, metalaxyl-m, methiocarb, methoxyfenozide, metribuzin, milbemectin, oxasulfuron, *Paecilomyces lilacinus* strain 251, phenmedipham, phosmet, pirimiphos-methyl, propamocarb, prothioconazole, pymetrozine and s-metolachlor (Text with EEA relevance)". 2. **Document Type**: The document type is "Regulation". 3. **Document Number**: The document number is "2018/917". 4. **Date**: The date is "27 June 2018". 5. **Construct the XML**: We will format these components into the AKN XML structure. Now, let\'s create the XML representation.\n\nXml: ```xml <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2018/917</docNumber></span> </p> <p>of<docDate date="2018-06-27">27 June 2018</docDate></p> <p>amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances alpha-cypermethrin, beflubutamid, benalaxyl, benthiavalicarb, bifenazate, boscalid, bromoxynil, captan, carvone, chlorpropham, cyazofamid, desmedipham, dimethoate, dimethomorph, diquat, ethephon, ethoprophos, etoxazole, famoxadone, fenamidone, fenamiphos, flumioxazine, fluoxastrobin, folpet, foramsulfuron, formetanate, <span class="ITALIC">Gliocladium catenulatum</span> strain: J1446, isoxaflutole, metalaxyl-m, methiocarb, methoxyfenozide, metribuzin, milbemectin, oxasulfuron, <span class="ITALIC">Paecilomyces lilacinus</span> strain 251, phenmedipham, phosmet, pirimiphos-methyl, propamocarb, prothioconazole, pymetrozine and s-metolachlor</p> <p>(Text with EEA relevance)</p> </longTitle> </preface> ```\n\n---\n\nText: Commission Delegated Regulation (EU) 2016/... of 18 February 2016 correcting Commission Delegated Regulation (EU) No 528/2014 supplementing Regulation (EU) No 575/2013 of the European Parliament and of the Council with regard to regulatory technical standards for non-delta risk of options in the standardised market risk approach and correcting Commission Delegated Regulation (EU) No 604/2014 supplementing Directive 2013/36/EU of the European Parliament and of the Council with regard to regulatory technical standards with respect to qualitative and appropriate quantitative criteria to identify categories of staff whose professional activities have a material impact on an institution\'s risk profile (Text with EEA relevance)\n\nDoc Number: 2016/861\n\nReasoning: Let\'s think step by step in order to produce the XML. We will start by identifying the key components of the preface section, which includes the title, document type, document number, and date. We will then format these components according to the Akoma Ntoso (AKN) XML structure. 1. **Identify the Title**: The title is "Commission Delegated Regulation (EU) 2016/...". 2. **Document Type**: The document type is "Regulation". 3. **Document Number**: The document number is "2016/861". 4. **Document Date**: The date is "18 February 2016". 5. **Content**: The content of the preface includes the full description of the regulation. Now, we will structure this information into the AKN XML format.\n\nXml: ```xml <preface> <longTitle> <p> <span>Commission Delegated<docType>Regulation</docType>(EU) No<docNumber>2016/861</docNumber></span> </p> <p>of<docDate date="2016-02-18">18 February 2016</docDate></p> <p>correcting Commission Delegated Regulation (EU) No 528/2014 supplementing Regulation (EU) No 575/2013 of the European Parliament and of the Council with regard to regulatory technical standards for non-delta risk of options in the standardised market risk approach and correcting Commission Delegated Regulation (EU) No 604/2014 supplementing Directive 2013/36/EU of the European Parliament and of the Council with regard to regulatory technical standards with respect to qualitative and appropriate quantitative criteria to identify categories of staff whose professional activities have a material impact on an institution\'s risk profile</p> <p>(Text with EEA relevance)</p> </longTitle> </preface> ```\n\n---\n\nText: Commission Implementing Regulation (EU) 2019/... of 17 May 2019 correcting the Greek language version of Implementing Regulation (EU) 2018/775 laying down rules for the application of Article 26(3) of Regulation (EU) No 1169/2011 of the European Parliament and of the Council on the provision of food information to consumers, as regards the rules for indicating the country of origin or place of provenance of the primary ingredient of a food (Text with EEA relevance)\nDoc Number: 2019/802\nXml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/802</docNumber></span> </p> <p>of<docDate date="2019-05-17">17 May 2019</docDate></p> <p>correcting the Greek language version of Implementing Regulation (EU) 2018/775 laying down rules for the application of Article 26(3) of Regulation (EU) No 1169/2011 of the European Parliament and of the Council on the provision of food information to consumers, as regards the rules for indicating the country of origin or place of provenance of the primary ingredient of a food</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>\n\n---\n\nText: Commission Delegated Regulation (EU) 2019/... of 12 March 2019 supplementing Regulation (EU) No 1308/2013 of the European Parliament and of the Council as regards wine-growing areas where the alcoholic strength may be increased, authorised oenological practices and restrictions applicable to the production and conservation of grapevine products, the minimum percentage of alcohol for by-products and their disposal, and publication of OIV files\nDoc Number: 2019/934\nXml: <preface> <longTitle> <p> <span>Commission Delegated<docType>Regulation</docType>(EU) No<docNumber>2019/934</docNumber></span> </p> <p>of<docDate date="2019-03-12">12 March 2019</docDate></p> <p>supplementing Regulation (EU) No 1308/2013 of the European Parliament and of the Council as regards wine-growing areas where the alcoholic strength may be increased, authorised oenological practices and restrictions applicable to the production and conservation of grapevine products, the minimum percentage of alcohol for by-products and their disposal, and publication of OIV files</p> </longTitle> </preface>\n\n---\n\nText: COMMISSION REGULATION (EU) 2019/... of 29 November 2019 amending Regulation (EC) No 1126/2008 adopting certain international accounting standards in accordance with Regulation (EC) No 1606/2002 of the European Parliament and of the Council as regards International Accounting Standards 1, 8, 34, 37 and 38, International Financial Reporting Standards 2, 3 and 6, Interpretations 12, 19, 20 and 22 of the International Financial Reporting Interpretations Committee and Interpretation 32 of the Standing Interpretations Committee **(Text with EEA relevance)\nDoc Number: 2019/2075\nXml: <preface> <longTitle> <p> <span>Commission<docType>Regulation</docType>(EU) No<docNumber>2019/2075</docNumber></span> </p> <p>of<docDate date="2019-11-29">29 November 2019</docDate></p> <p>amending Regulation (EC) No 1126/2008 adopting certain international accounting standards in accordance with Regulation (EC) No 1606/2002 of the European Parliament and of the Council as regards International Accounting Standards 1, 8, 34, 37 and 38, International Financial Reporting Standards 2, 3 and 6, Interpretations 12, 19, 20 and 22 of the International Financial Reporting Interpretations Committee and Interpretation 32 of the Standing Interpretations Committee</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>\n\n---\n\nText: Commission Implementing Regulation (EU) 2019/... of 13 February 2019 amending Implementing Regulation (EU) No 821/2014 laying down rules for the application of Regulation (EU) No 1303/2013 of the European Parliament and of the Council as regards detailed arrangements for the transfer and management of programme contributions, the reporting on financial instruments, technical characteristics of information and communication measures for operations and the system to record and store data\nDoc Number: 2019/255\nXml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/255</docNumber></span> </p> <p>of<docDate date="2019-02-13">13 February 2019</docDate></p> <p>amending Implementing Regulation (EU) No 821/2014 laying down rules for the application of Regulation (EU) No 1303/2013 of the European Parliament and of the Council as regards detailed arrangements for the transfer and management of programme contributions, the reporting on financial instruments, technical characteristics of information and communication measures for operations and the system to record and store data</p> </longTitle> </preface>\n\n---\n\nText: Commission Implementing Regulation (EU) 2018/... of 7 March 2018 concerning the authorisation of a preparation of 6-phytase, produced by *Aspergillus niger* (DSM 25770) as feed additive for chickens for fattening, chickens reared for laying, pigs for fattening, sows, minor porcine species for fattening or for reproduction, turkeys for fattening, turkeys reared for breeding, all other avian species (excluding laying birds) and weaned piglets (holder of the authorisation BASF SE) **(Text with EEA relevance)\nDoc Number: 2018/338\nXml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2018/338</docNumber></span> </p> <p>of<docDate date="2018-03-07">7 March 2018</docDate></p> <p>concerning the authorisation of a preparation of 6-phytase, produced by<span class="ITALIC">Aspergillus niger</span>(DSM 25770) as feed additive for chickens for fattening, chickens reared for laying, pigs for fattening, sows, minor porcine species for fattening or for reproduction, turkeys for fattening, turkeys reared for breeding, all other avian species (excluding laying birds) and weaned piglets (holder of the authorisation BASF SE)</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>\n\n---\n\nText: COMMISSION IMPLEMENTING REGULATION (EU) 2020/... of 5 February 2020 concerning the authorisation of a preparation of muramidase produced by *Trichoderma reesei* DSM 32338 as a feed additive for turkeys for fattening, turkeys reared for breeding, chickens reared for breeding and other poultry species reared for breeding (holder of authorisation DSM Nutritional Products Ltd. represented in the Union by DSM Nutritional Products Sp. Z o.o) **(Text with EEA relevance)\nDoc Number: 2020/163\nXml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2020/163</docNumber></span> </p> <p>of<docDate date="2020-02-05">5 February 2020</docDate></p> <p>concerning the authorisation of a preparation of muramidase produced by<span class="ITALIC">Trichoderma reesei</span>DSM 32338 as a feed additive for turkeys for fattening, turkeys reared for breeding, chickens reared for breeding and other poultry species reared for breeding (holder of authorisation DSM Nutritional Products Ltd. represented in the Union by DSM Nutritional Products Sp. Z o.o)</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>\n\n---\n\nText: Commission Implementing Regulation (EU) 2018/... of 12 March 2018 amending Implementing Regulation (EU) No 1419/2013 concerning the recognition of producer organisations and inter-branch organisations, the extension of the rules of producer organisations and inter-branch organisations and the publication of trigger prices as provided for by Regulation (EU) No 1379/2013 of the European Parliament and of the Council on the common organisation of the markets in fishery and aquaculture products\nDoc Number: 2018/390\nXml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2018/390</docNumber></span> </p> <p>of<docDate date="2018-03-12">12 March 2018</docDate></p> <p>amending Implementing Regulation (EU) No 1419/2013 concerning the recognition of producer organisations and inter-branch organisations, the extension of the rules of producer organisations and inter-branch organisations and the publication of trigger prices as provided for by Regulation (EU) No 1379/2013 of the European Parliament and of the Council on the common organisation of the markets in fishery and aquaculture products</p> </longTitle> </preface>\n\n---\n\nText: Commission Implementing Regulation (EU) 2019/... of 29 April 2019 approving the low-risk active substance ABE-IT 56 (components of lysate of *Saccharomyces cerevisiae* strain DDSF623), in accordance with Regulation (EC) No 1107/2009 of the European Parliament and of the Council concerning the placing of plant protection products on the market, and amending the Annex to Commission Implementing Regulation (EU) No 540/2011 (Text with EEA relevance)\nDoc Number: 2019/676\nXml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/676</docNumber></span> </p> <p>of<docDate date="2019-04-29">29 April 2019</docDate></p> <p>approving the low-risk active substance ABE-IT 56 (components of lysate of<span class="ITALIC">Saccharomyces cerevisiae</span>strain DDSF623), in accordance with Regulation (EC) No 1107/2009 of the European Parliament and of the Council concerning the placing of plant protection products on the market, and amending the Annex to Commission Implementing Regulation (EU) No 540/2011</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>\n\n---\n\nText: Commission Implementing Regulation (EU) 2019/... of 20 February 2019 correcting certain language versions of Implementing Regulation (EU) 2016/2286 laying down detailed rules on the application of fair use policy and on the methodology for assessing the sustainability of the abolition of retail roaming surcharges and on the application to be submitted by a roaming provider for the purposes of that assessment **(Text with EEA relevance)\nDoc Number: 2019/296\nXml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/296</docNumber></span> </p> <p>of<docDate date="2019-02-20">20 February 2019</docDate></p> <p>correcting certain language versions of Implementing Regulation (EU) 2016/2286 laying down detailed rules on the application of fair use policy and on the methodology for assessing the sustainability of the abolition of retail roaming surcharges and on the application to be submitted by a roaming provider for the purposes of that assessment</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>\n\n---\n\nText: Commission Implementing Regulation (EU) 2019/\\... of 7 February 2019 correcting certain language versions of Implementing Regulation (EU) 2017/2330 concerning the authorisation of Iron(II) carbonate, Iron(III) chloride hexahydrate, Iron(II) sulphate monohydrate, Iron(II) sulphate heptahydrate, Iron(II) fumarate, Iron(II) chelate of amino acids hydrate, Iron(II) chelate of protein hydrolysates and Iron(II) chelate of glycine hydrate as feed additives for all animal species and of Iron dextran as feed additive for piglets and amending Regulations (EC) No 1334/2003 and (EC) No 479/2006 (Text with EEA relevance)\nDoc Number: 2019/230\nXml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/230</docNumber></span> </p> <p>of<docDate date="2019-02-07">7 February 2019</docDate></p> <p>correcting certain language versions of Implementing Regulation (EU) 2017/2330 concerning the authorisation of Iron(II) carbonate, Iron(III) chloride hexahydrate, Iron(II) sulphate monohydrate, Iron(II) sulphate heptahydrate, Iron(II) fumarate, Iron(II) chelate of amino acids hydrate, Iron(II) chelate of protein hydrolysates and Iron(II) chelate of glycine hydrate as feed additives for all animal species and of Iron dextran as feed additive for piglets and amending Regulations (EC) No 1334/2003 and (EC) No 479/2006</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>\n\n---\n\nText: Commission Implementing Regulation (EU) 2016/... of 15 June 2016 amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances 2,4-DB, beta-cyfluthrin, carfentrazone ethyl, *Coniothyrium minitans* Strain CON/M/91-08 (DSM 9660), cyazofamid, deltamethrin, dimethenamid-P, ethofumesate, fenamidone, flufenacet, flurtamone, foramsulfuron, fosthiazate, imazamox, iodosulfuron, iprodione, isoxaflutole, linuron, maleic hydrazide, mesotrione, oxasulfuron, pendimethalin, picoxystrobin, silthiofam and trifloxystrobin **(Text with EEA relevance)\nDoc Number: 2016/950\nXml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2016/950</docNumber></span> </p> <p>of<docDate date="2016-06-15">15 June 2016</docDate></p> <p>amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances 2,4-DB, beta-cyfluthrin, carfentrazone ethyl,<span class="ITALIC">Coniothyrium minitans</span>Strain CON/M/91-08 (DSM 9660), cyazofamid, deltamethrin, dimethenamid-P, ethofumesate, fenamidone, flufenacet, flurtamone, foramsulfuron, fosthiazate, imazamox, iodosulfuron, iprodione, isoxaflutole, linuron, maleic hydrazide, mesotrione, oxasulfuron, pendimethalin, picoxystrobin, silthiofam and trifloxystrobin</p> <p>(Text with EEA relevance)</p> </longTitle> </preface>\n\n---\n\nText: Commission Implementing Regulation (EU) 2020/\\... of 19 December 2019 approving non-minor amendments to the specification for a name entered in the register of protected designations of origin and protected geographical indications (\'Huile d\'olive de la Vallée des Baux-de-Provence\' (PDO))\nDoc Number: 2020/7\nXml: <preface> <longTitle> <p> <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2020/7</docNumber></span> </p> <p>of<docDate date="2019-12-19">19 December 2019</docDate></p> <p>approving non-minor amendments to the specification for a name entered in the register of protected designations of origin and protected geographical indications ( "Huile d’olive de la Vallée des Baux-de-Provence" (PDO))</p> </longTitle> </preface>\n\n---\n\nText: Commission Implementing Regulation (EU) 2019/... of 14 February 2019 amending Council Implementing Regulation (EU) No 430/2013 imposing a definitive anti-dumping duty and collecting definitively the provisional duty imposed on imports of threaded tube or pipe cast fittings, of malleable cast iron, originating in the People\'s Republic of China and Thailand and terminating the proceeding with regard to Indonesia\n\nDoc Number: 2019/262\n\nReasoning: Let\'s think step by step in order to\x1b[32m produce the XML. We will start by identifying the key components of the preface section, which includes the title, document type, document number, and date. We will then format these components according to the Akoma Ntoso (AKN) XML structure. \n\n1. **Identify the Title**: The title is "Commission Implementing Regulation (EU) 2019/...".\n2. **Document Type**: The document type is "Regulation".\n3. **Document Number**: The document number is "2019/262".\n4. **Document Date**: The date is "14 February 2019".\n5. **Content**: The content of the preface includes the full description of the regulation.\n\nNow, we will structure this information into the AKN XML format.\n\nXml: ```xml\n<preface>\n <longTitle>\n <p>\n <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/262</docNumber></span>\n </p>\n <p>of<docDate date="2019-02-14">14 February 2019</docDate></p>\n <p>amending Council Implementing Regulation (EU) No 430/2013 imposing a definitive anti-dumping duty and collecting definitively the provisional duty imposed on imports of threaded tube or pipe cast fittings, of malleable cast iron, originating in the People\'s Republic of China and Thailand and terminating the proceeding with regard to Indonesia</p>\n </longTitle>\n</preface>\n```\x1b[0m\n\n\n'
'\n\n\n<|begin_of_text|><|start_header_id|>user<|end_header_id|>Create an XML representation of a document preface section in the Akoma Ntoso (AKN) format. Update the docNumber tag (if provided) with the provided doc_number.\n\n---\n\nFollow the following format.\n\nText: Raw text format of the document prefece section\n\nDoc Number: Document number to be included in the XML representation\n\nReasoning: Let\'s think step by step in order to ${produce the xml}. We ...\n\nXml: Akoma Ntoso (AKN) XML representation of the input preface\n\n---\n\nText:\nCOMMISSION IMPLEMENTING REGULATION (EU) 2017/\\...\n\nof 11 November 2016\n\nlaying down implementing technical standards with regard to standard\nforms, templates and procedures for authorisation, review and evaluation\nof central securities depositories, for the cooperation between\nauthorities of the home Member State and the host Member State, for the\nconsultation of authorities involved in the authorisation to provide\nbanking-type ancillary services, for access involving central securities\ndepositories, and with regard to the format of the records to be\nmaintained by central securities depositories in accordance with\nRegulation (EU) No 909/2014 of the European Parliament and of the\nCouncil\n\n**(Text with EEA relevance)\n\nDoc Number: 2017/394\n\nReasoning: Let\'s think step by step in order to The provided text is a document preface section in the Akoma Ntoso (AKN) format. The docNumber tag should be updated with the provided doc_number. The longTitle element contains the title of the document, which includes the docType and docNumber. The docDate element is used to specify the date of the document. The text within the longTitle element provides a brief description of the document.\n\nXml:\n<preface>\n\n<longTitle>\n\n<p>\n\n<span>COMMISSION IMPLEMENTING REGULATION (EU) 2017/394</span>\n\n</p>\n\n<p>of<docDate date="2017-11-11">11 November 2016</docDate></p>\n\n<p>laying down implementing technical standards with regard to standard forms, templates and procedures for authorisation, review and evaluation of central securities depositories, for the cooperation between authorities of the home Member State and the host Member State, for the consultation of authorities involved in the authorisation to provide banking-type ancillary services, for access involving central securities depositories, and with regard to the format of the records to be maintained by central securities depositories in accordance with Regulation (EU) No 909/2014 of the European Parliament and of the Council</p>\n\n<p>(Text with EEA relevance)</p>\n\n</longTitle>\n\n</preface>\n\n---\n\nText:\nCommission Implementing Regulation (EU) 2017/...\n\nof 17 May 2017\n\namending Implementing Regulation (EU) No 540/2011 as regards the\nextension of the approval periods of the active substances\nalpha-cypermethrin, Ampelomyces quisqualis strain: aq 10, benalaxyl,\nbentazone, bifenazate, bromoxynil, carfentrazone ethyl, chlorpropham,\ncyazofamid, desmedipham, diquat, DPX KE 459 (flupyrsulfuron-methyl),\netoxazole, famoxadone, fenamidone, flumioxazine, foramsulfuron,\nGliocladium catenulatum strain: j1446, imazamox, imazosulfuron,\nisoxaflutole, laminarin, metalaxyl-m, methoxyfenozide, milbemectin,\noxasulfuron, pendimethalin, phenmedipham, pymetrozine, s-metolachlor,\nand trifloxystrobin\n\n(Text with EEA relevance)\n\nDoc Number: 2017/841\n\nReasoning: Let\'s think step by step in order to produce the xml. We need to identify the elements and attributes that make up the preface section in the Akoma Ntoso (AKN) format. The preface section typically includes a long title with a document type and number, and a date. We will use the provided text to populate these elements and attributes.\n\nXml:\n<preface>\n\n<longTitle>\n\n<p>\n\n<span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2017/841</docNumber></span>\n\n</p>\n\n<p>of<docDate date="2017-05-17">17 May 2017</docDate></p>\n\n<p>amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances alpha-cypermethrin, Ampelomyces quisqualis strain: aq 10, benalaxyl, bentazone, bifenazate, bromoxynil, carfentrazone ethyl, chlorpropham, cyazofamid, desmedipham, diquat, DPX KE 459 (flupyrsulfuron-methyl), etoxazole, famoxadone, fenamidone, flumioxazine, foramsulfuron, Gliocladium catenulatum strain: j1446, imazamox, imazosulfuron, isoxaflutole, laminarin, metalaxyl-m, methoxyfenozide, milbemectin, oxasulfuron, pendimethalin, phenmedipham, pymetrozine, s-metolachlor, and trifloxystrobin</p>\n\n</longTitle>\n\n</preface>\n\n---\n\nText:\n**COMMISSION IMPLEMENTING REGULATION (EU) 2017/...**\n\n**of 10 February 2017**\n\n**concerning the non-renewal of approval of the active substance\nlinuron, in accordance with Regulation (EC) No 1107/2009 of the European\nParliament and of the Council concerning the placing of plant protection\nproducts on the market, and amending the Annex to Commission\nImplementing Regulation (EU) No 540/2011**\n\n**(Text with EEA relevance)\n\nDoc Number: 2017/244\n\nPlease provide the output fields Reasoning then Xml. Do so immediately, without additional content before or after, and precisely as the format above shows. Begin with the field Reasoning.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n\x1b[32mReasoning: Let\'s think step by step in order to produce the xml. We need to identify the elements and attributes that make up the preface section in the Akoma Ntoso (AKN) format. The preface section typically includes a long title with a document type and number, and a date. We will use the provided text to populate these elements and attributes.\n\nXml:\n<preface>\n\n<longTitle>\n\n<p>\n\n<span>COMMISSION IMPLEMENTING REGULATION (EU) 2017/244</span>\n\n</p>\n\n<p>of<docDate date="2017-02-10">10 February 2017</docDate></p>\n\n<p>concerning the non-renewal of approval of the active substance linuron, in accordance with Regulation (EC) No 1107/2009 of the European Parliament and of the Council concerning the placing of plant protection products on the market, and amending the Annex to Commission Implementing Regulation (EU) No 540/2011</p>\n\n</longTitle>\n\n</preface>\x1b[0m\n\n\n'
%% Cell type:markdown id: tags:
### Other optimizers
%% Cell type:code id: tags:
``` python
from dspy.teleprompt import BootstrapFewShot, BootstrapFewShotWithRandomSearch
teleprompter = BootstrapFewShotWithRandomSearch(metric=validate_xml_rouge_score)
teleprompter = BootstrapFewShotWithRandomSearch(metric=validate_xml_rouge_score,max_bootstrapped_demos=2, max_labeled_demos=2, num_candidate_programs=4)
compiled_pipeline = teleprompter.compile(DocumentToXMLPipeline(), trainset=trainset[:50])
```
%% Cell type:code id: tags:
``` python
from dspy.teleprompt import MIPRO
# teleprompter = MIPRO(metric=validate_xml_rouge_score)
# kwargs = dict(num_threads=5, display_progress=True, display_table=0)
# compiled_pipeline = teleprompter.compile(DocumentToXMLPipeline(), trainset=trainset, num_trials=100, max_bootstrapped_demos=3, max_labeled_demos=5,eval_kwargs=kwargs)
```
%% Cell type:code id: tags:
``` python
import os
# Create directory if it does not exist
os.makedirs("data/prefaces", exist_ok=True)
os.makedirs("../data/prefaces", exist_ok=True)
compiled_pipeline.save("data/prefaces/prefaces.prog.json")
compiled_pipeline.save(f"../data/prefaces/prefaces_{selected_llm}.prog.json")
```
%% Output
[('transform', Predict(StringSignature(text, doc_number -> rationale, xml
instructions='Create an XML representation of a document preface section in the Akoma Ntoso (AKN) format. Update the docNumber tag (if provided) with the provided doc_number.'
text = Field(annotation=str required=True json_schema_extra={'desc': 'Raw text format of the document prefece section', '__dspy_field_type': 'input', 'prefix': 'Text:'})
doc_number = Field(annotation=str required=True json_schema_extra={'desc': 'Document number to be included in the XML representation', '__dspy_field_type': 'input', 'prefix': 'Doc Number:'})
rationale = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${produce the xml}. We ...', '__dspy_field_type': 'output'})
xml = Field(annotation=str required=True json_schema_extra={'desc': 'Akoma Ntoso (AKN) XML representation of the input preface', '__dspy_field_type': 'output', 'prefix': 'Xml:'})
)))]
%% Cell type:code id: tags:
``` python
text = "COMMISSION IMPLEMENTING REGULATION (EU) 2021/...\n\nof 13 January 2021\n\namending and correcting Regulation (EC) No 1235/2009 laying down\ndetailed rules for implementation of Council Regulation (EC) No 834/2008\nas regards the arrangements for imports of electrical products from third\ncountries\n\n**(Text with EEA relevance)**\n\n"
```
%% Cell type:code id: tags:
``` python
xml = compiled_pipeline(text)
```
%% Cell type:code id: tags:
``` python
xml
```
%% Output
{'xml': '<preface>\n <longTitle>\n <p>\n <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2021/...</docNumber></span>\n </p>\n <p>of<docDate date="2021-01-13">13 January 2021</docDate></p>\n <p>amending and correcting Regulation (EC) No 1235/2009 laying down detailed rules for implementation of Council Regulation (EC) No 834/2008 as regards the arrangements for imports of electrical products from third countries</p>\n <p>(Text with EEA relevance)</p>\n </longTitle>\n</preface>'}
{'xml': '<preface>\n\n<longTitle>\n\n<p>\n\n<span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2021/...</docNumber></span>\n\n</p>\n\n<p>of<docDate date="2021-01-13">13 January 2021</docDate></p>\n\n<p>amending and correcting Regulation (EC) No 1235/2009 laying down detailed rules for implementation of Council Regulation (EC) No 834/2008 as regards the arrangements for imports of electrical products from third countries</p>\n\n<p>(Text with EEA relevance)</p>\n\n</longTitle>\n\n</preface>'}
%% Cell type:code id: tags:
``` python
text2 = """COMMISSION IMPLEMENTING REGULATION (EU) 2019/...
of 31 January 2019
amending Implementing Regulation (EU) No 540/2011 as regards the
extension of the approval periods of the active substances abamectin,
*Bacillus subtilis* (Cohn 1872) Strain QST 713, *Bacillus thuringiensis*
subsp. Aizawai, *Bacillus thuringiensis* subsp. israeliensis, *Bacillus
thuringiensis* subsp. kurstaki, *Beauveria bassiana*, benfluralin,
clodinafop, clopyralid, *Cydia pomonella Granulovirus* (CpGV),
cyprodinil, dichlorprop-P, epoxiconazole, fenpyroximate, fluazinam,
flutolanil, fosetyl, *Lecanicillium muscarium*, mepanipyrim, mepiquat,
*Metarhizium anisopliae* var. Anisopliae, metconazole, metrafenone,
*Phlebiopsis gigantea*, pirimicarb, *Pseudomonas chlororaphis* strain:
MA 342, pyrimethanil*, Pythium oligandrum*, rimsulfuron, spinosad,
*Streptomyces* K61, thiacloprid, tolclofos-methyl, *Trichoderma
asperellum*, *Trichoderma atroviride*, *Trichoderma gamsii*,
*Trichoderma harzianum*, triclopyr, trinexapac, triticonazole,
*Verticillium albo-atrum* and ziram
(Text with EEA relevance)"""
```
%% Cell type:code id: tags:
``` python
# import dspy
# selected_llm = llms["llama3.2.3b"]
# dspy.settings.configure(lm=selected_llm)
# compiled_pipeline = dspy.load("data/prefaces/prefaces2.prog.json")
```
%% Cell type:code id: tags:
``` python
xml2 = compiled_pipeline(text2)
```
%% Cell type:code id: tags:
``` python
xml2
```
%% Output
{'xml': '<preface>\n <longTitle>\n <p>\n <span>Commission Implementing<docType>Regulation</docType>(EU) No<docNumber>2019/168</docNumber></span>\n </p>\n <p>of<docDate date="2019-01-31">31 January 2019</docDate></p>\n <p>amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances abamectin, <span class="ITALIC">Bacillus subtilis</span> (Cohn 1872) Strain QST 713, <span class="ITALIC">Bacillus thuringiensis</span> subsp. Aizawai, <span class="ITALIC">Bacillus thuringiensis</span> subsp. israeliensis, <span class="ITALIC">Bacillus thuringiensis</span> subsp. kurstaki, <span class="ITALIC">Beauveria bassiana</span>, benfluralin, clodinafop, clopyralid, <span class="ITALIC">Cydia pomonella Granulovirus</span> (CpGV), cyprodinil, dichlorprop-P, epoxiconazole, fenpyroximate, fluazinam, flutolanil, fosetyl, <span class="ITALIC">Lecanicillium muscarium</span>, mepanipyrim, mepiquat, <span class="ITALIC">Metarhizium anisopliae</span> var. Anisopliae, metconazole, metrafenone, <span class="ITALIC">Phlebiopsis gigantea</span>, pirimicarb, <span class="ITALIC">Pseudomonas chlororaphis</span> strain: MA 342, pyrimethanil, <span class="ITALIC">Pythium oligandrum</span>, rimsulfuron, spinosad, <span class="ITALIC">Streptomyces</span> K61, thiacloprid, tolclofos-methyl, <span class="ITALIC">Trichoderma asperellum</span>, <span class="ITALIC">Trichoderma atroviride</span>, <span class="ITALIC">Trichoderma gamsii</span>, <span class="ITALIC">Trichoderma harzianum</span>, triclopyr, trinexapac, triticonazole, <span class="ITALIC">Verticillium albo-atrum</span> and ziram</p>\n <p>(Text with EEA relevance)</p>\n </longTitle>\n</preface>'}
{'xml': '<preface>\n\n<longTitle>\n\n<p>\n\n<span>COMMISSION IMPLEMENTING REGULATION (EU) 2019/394</span>\n\n</p>\n\n<p>of<docDate date="2019-01-31">31 January 2019</docDate></p>\n\n<p>amending Implementing Regulation (EU) No 540/2011 as regards the extension of the approval periods of the active substances abamectin, *Bacillus subtilis* (Cohn 1872) Strain QST 713, *Bacillus thuringiensis* subsp. Aizawai, *Bacillus thuringiensis* subsp. israeliensis, *Bacillus thuringiensis* subsp. kurstaki, *Beauveria bassiana*, benfluralin, clodinafop, clopyralid, *Cydia pomonella Granulovirus* (CpGV), cyprodinil, dichlorprop-P, epoxiconazole, fenpyroximate, fluazinam, flutolanil, fosetyl, *Lecanicillium muscarium*, mepanipyrim, mepiquat, *Metarhizium anisopliae* var. Anisopliae, metconazole, metrafenone, *Phlebiopsis gigantea*, pirimicarb, *Pseudomonas chlororaphis* strain: MA 342, pyrimethanil*, Pythium oligandrum*, rimsulfuron, spinosad, *Streptomyces* K61, thiacloprid, tolclofos-methyl, *Trichoderma asperellum*, *Trichoderma atroviride*, *Trichoderma gamsii*, *Trichoderma harzianum*, triclopyr, trinexapac, triticonazole, *Verticillium albo-atrum* and ziram</p>\n\n</longTitle>\n\n</preface>'}
%% Cell type:markdown id: tags:
### K-fold cross validation
%% Cell type:code id: tags:
``` python
# TODO: refactor
from sklearn.model_selection import KFold
def k_fold_cross_validation(model_class, data, k=5, metric_function=None):
kf = KFold(n_splits=k, shuffle=True, random_state=42)
results = []
for train_index, test_index in kf.split(data):
train_data = [data[i] for i in train_index]
test_data = [data[i] for i in test_index]
# Compile the model with the training data
compiled_model = BootstrapFewShot(metric=metric_function).compile(model_class(), trainset=train_data)
# Evaluate the model with the test data
evaluate = Evaluate(devset=test_data, metric=metric_function, num_threads=4, display_progress=True, display_table=0)
result = evaluate(compiled_model)
# Save the result
results.append(result)
# Calculate the average of the results
average_result = sum(results) / len(results)
return average_result
```
%% Cell type:code id: tags:
``` python
# Assuming 'validate_xml_rouge_score' is your metric function and 'DocumentToXMLPipeline' is your model class
# Use a subset of 100 examples from the trainset
trainset_subset = trainset[:100]
average_metric = k_fold_cross_validation(DocumentToXMLPipeline, trainset_subset, k=10, metric_function=validate_xml_rouge_score)
print("Average Metric across folds (using 100 examples):", average_metric)
```
%% Output
Average Metric: 6 / 10 (60.0): 100%|██████████| 10/10 [00:11<00:00, 1.12s/it]
2%|▏ | 2/90 [00:09<07:01, 4.79s/it]
rougeL_f1 0.8577981651376146
----------------------------------------------------------------------------------------------------
REGULATION (EU) 2017/...
OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL
of 15 March 2017
on official controls and other official activities performed to ensure
the application of food and feed law, rules on animal health and
welfare, plant health and plant protection products, amending
Regulations (EC) No 999/2001, (EC) No 396/2005, (EC) No 1069/2009, (EC)
No 1107/2009, (EU) No 1151/2012, (EU) No 652/2014, (EU) 2016/429 and
(EU) 2016/2031 of the European Parliament and of the Council, Council
Regulations (EC) No 1/2005 and (EC) No 1099/2009 and Council
Directives 98/58/EC, 1999/74/EC, 2007/43/EC, 2008/119/EC
and 2008/120/EC, and repealing Regulations (EC) No 854/2004 and (EC)
No 882/2004 of the European Parliament and of the Council, Council
Directives 89/608/EEC, 89/662/EEC, 90/425/EEC, 91/496/EEC, 96/23/EC,
96/93/EC and 97/78/EC and Council Decision 92/438/EEC (Official Controls
Regulation)
(Text with EEA relevance)
<preface>
<longTitle>
<p>
<span>
<docType>Regulation</docType>
(EU) No
<docNumber>2017/625</docNumber>
</span>
of the European Parliament and of the Council
</p>
<p>
of
<docDate date="2017-03-15">15 March 2017</docDate>
</p>
<p>on official controls and other official activities performed to ensure the application of food and feed law, rules on animal health and welfare, plant health and plant protection products, amending Regulations (EC) No 999/2001, (EC) No 396/2005, (EC) No 1069/2009, (EC) No 1107/2009, (EU) No 1151/2012, (EU) No 652/2014, (EU) 2016/429 and (EU) 2016/2031 of the European Parliament and of the Council, <ref href="/akn/eu/act/regulation/ep/2005/1/">Council Regulations (EC) No 1/2005 </ref>and <ref href="/akn/eu/act/regulation/ep/2009/1099/">(EC) No 1099/2009</ref>and Council Directives 98/58/EC, 1999/74/EC, 2007/43/EC, 2008/119/EC and 2008/120/EC, and repealing <ref href="/akn/eu/act/regulation/ep/2004/854/">Regulations (EC) No 854/2004 </ref>and <ref href="/akn/eu/act/regulation/ep/2004/882/">(EC) No 882/2004</ref>of the European Parliament and of the Council, Council Directives 89/608/EEC, 89/662/EEC, 90/425/EEC, 91/496/EEC, 96/23/EC, 96/93/EC and 97/78/EC and Council <ref href="/akn/eu/act/decision/ep/1992/438/">Decision 92/438/EEC </ref>(Official Controls Regulation)</p>
<p>(Text with EEA relevance)</p>
</longTitle>
</preface>
<preface>
<longTitle>
<p>
<span>REGULATION<docType>Regulation</docType>(EU) No<docNumber>2017/625</docNumber></span>
</p>
<p>of<docDate date="2017-03-15">15 March 2017</docDate></p>
<p>on official controls and other official activities performed to ensure the application of food and feed law, rules on animal health and welfare, plant health and plant protection products, amending Regulations (EC) No 999/2001, (EC) No 396/2005, (EC) No 1069/2009, (EC) No 1107/2009, (EU) No 1151/2012, (EU) No 652/2014, (EU) 2016/429 and (EU) 2016/2031 of the European Parliament and of the Council, Council Regulations (EC) No 1/2005 and (EC) No 1099/2009 and Council Directives 98/58/EC, 1999/74/EC, 2007/43/EC, 2008/119/EC and 2008/120/EC, and repealing Regulations (EC) No 854/2004 and (EC) No 882/2004 of the European Parliament and of the Council, Council Directives 89/608/EEC, 89/662/EEC, 90/425/EEC, 91/496/EEC, 96/23/EC, 96/93/EC and 97/78/EC and Council Decision 92/438/EEC</p>
<p>(Text with EEA relevance)</p>
</longTitle>
</preface>
----------------------------------------------------------------------------------------------------
6%|▌ | 5/90 [00:23<06:34, 4.64s/it]
Bootstrapped 4 full traces after 6 examples in round 0.
Average Metric: 1 / 2 (50.0): 20%|██ | 2/10 [00:04<00:14, 1.80s/it]
rougeL_f1 0.7101449275362319
----------------------------------------------------------------------------------------------------
Commission Delegated Regulation (EU) 2016/...
of 18 May 2016
supplementing Regulation (EU) No 1308/2013 of the European Parliament
and of the Council with regard to the rules for applying the system of
import and export licences and supplementing Regulation (EU) No
1306/2013 of the European Parliament and of the Council with regard to
the rules on the release and forfeit of securities lodged for such
licences, amending Commission Regulations (EC) No 2535/2001, (EC) No
1342/2003, (EC) No 2336/2003, (EC) No 951/2006, (EC) No 341/2007 and
(EC) No 382/2008 and repealing Commission Regulations (EC) No 2390/98,
(EC) No 1345/2005, (EC) No 376/2008 and (EC) No 507/2008
**(Text with EEA relevance)
<preface>
<longTitle>
<p>
<span>
Commission Delegated
<docType>Regulation</docType>
(EU) No
<docNumber>2016/1237</docNumber>
</span>
</p>
<p>
of
<docDate date="2016-05-18">18 May 2016</docDate>
</p>
<p>supplementing <ref href="/akn/eu/act/regulation/ep/2013/1308/">Regulation (EU) No 1308/2013 </ref>of the European Parliament and of the Council with regard to the rules for applying the system of import and export licences and supplementing <ref href="/akn/eu/act/regulation/ep/2013/1306/">Regulation (EU) No 1306/2013 </ref>of the European Parliament and of the Council with regard to the rules on the release and forfeit of securities lodged for such licences, amending Commission Regulations <ref href="/akn/eu/act/regulation/ep/2001/2535/">(EC) No 2535/2001</ref> <ref href="/akn/eu/act/regulation/ep/2003/1342/">(EC) No 1342/2003</ref> <ref href="/akn/eu/act/regulation/ep/2003/2336/">(EC) No 2336/2003</ref> <ref href="/akn/eu/act/regulation/ep/2006/951/">(EC) No 951/2006</ref> <ref href="/akn/eu/act/regulation/ep/2007/341/">(EC) No 341/2007</ref>and <ref href="/akn/eu/act/regulation/ep/2008/382/">(EC) No 382/2008</ref>and repealing Commission Regulations <ref href="/akn/eu/act/regulation/ep/1998/2390/">(EC) No 2390/98</ref> <ref href="/akn/eu/act/regulation/ep/2005/1345/">(EC) No 1345/2005</ref> <ref href="/akn/eu/act/regulation/ep/2008/376/">(EC) No 376/2008</ref>and <ref href="/akn/eu/act/regulation/ep/2008/507/">(EC) No 507/2008</ref></p>
<p>(Text with EEA relevance)</p>
</longTitle>
</preface>
<preface>
<longTitle>
<p>
<span>Commission Delegated<docType>Regulation</docType>(EU) No<docNumber>2016/1237</docNumber></span>
</p>
<p>of<docDate date="2016-05-18">18 May 2016</docDate></p>
<p>supplementing Regulation (EU) No 1308/2013 of the European Parliament and of the Council with regard to the rules for applying the system of import and export licences and supplementing Regulation (EU) No 1306/2013 of the European Parliament and of the Council with regard to the rules on the release and forfeit of securities lodged for such licences, amending Commission Regulations (EC) No 2535/2001, (EC) No 1342/2003, (EC) No 2336/2003, (EC) No 951/2006, (EC) No 341/2007 and (EC) No 382/2008 and repealing Commission Regulations (EC) No 2390/98, (EC) No 1345/2005, (EC) No 376/2008 and (EC) No 507/2008</p>
<p>(Text with EEA relevance)</p>
</longTitle>
</preface>
----------------------------------------------------------------------------------------------------
Average Metric: 6 / 10 (60.0): 100%|██████████| 10/10 [00:11<00:00, 1.18s/it]
4%|▍ | 4/90 [00:25<08:49, 6.16s/it]
rougeL_f1 0.860411899313501
----------------------------------------------------------------------------------------------------
REGULATION (EU) 2017/...
OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL
of 15 March 2017
on official controls and other official activities performed to ensure
the application of food and feed law, rules on animal health and
welfare, plant health and plant protection products, amending
Regulations (EC) No 999/2001, (EC) No 396/2005, (EC) No 1069/2009, (EC)
No 1107/2009, (EU) No 1151/2012, (EU) No 652/2014, (EU) 2016/429 and
(EU) 2016/2031 of the European Parliament and of the Council, Council
Regulations (EC) No 1/2005 and (EC) No 1099/2009 and Council
Directives 98/58/EC, 1999/74/EC, 2007/43/EC, 2008/119/EC
and 2008/120/EC, and repealing Regulations (EC) No 854/2004 and (EC)
No 882/2004 of the European Parliament and of the Council, Council
Directives 89/608/EEC, 89/662/EEC, 90/425/EEC, 91/496/EEC, 96/23/EC,
96/93/EC and 97/78/EC and Council Decision 92/438/EEC (Official Controls
Regulation)
(Text with EEA relevance)
<preface>
<longTitle>
<p>
<span>
<docType>Regulation</docType>
(EU) No
<docNumber>2017/625</docNumber>
</span>
of the European Parliament and of the Council
</p>
<p>
of
<docDate date="2017-03-15">15 March 2017</docDate>
</p>
<p>on official controls and other official activities performed to ensure the application of food and feed law, rules on animal health and welfare, plant health and plant protection products, amending Regulations (EC) No 999/2001, (EC) No 396/2005, (EC) No 1069/2009, (EC) No 1107/2009, (EU) No 1151/2012, (EU) No 652/2014, (EU) 2016/429 and (EU) 2016/2031 of the European Parliament and of the Council, <ref href="/akn/eu/act/regulation/ep/2005/1/">Council Regulations (EC) No 1/2005 </ref>and <ref href="/akn/eu/act/regulation/ep/2009/1099/">(EC) No 1099/2009</ref>and Council Directives 98/58/EC, 1999/74/EC, 2007/43/EC, 2008/119/EC and 2008/120/EC, and repealing <ref href="/akn/eu/act/regulation/ep/2004/854/">Regulations (EC) No 854/2004 </ref>and <ref href="/akn/eu/act/regulation/ep/2004/882/">(EC) No 882/2004</ref>of the European Parliament and of the Council, Council Directives 89/608/EEC, 89/662/EEC, 90/425/EEC, 91/496/EEC, 96/23/EC, 96/93/EC and 97/78/EC and Council <ref href="/akn/eu/act/decision/ep/1992/438/">Decision 92/438/EEC </ref>(Official Controls Regulation)</p>
<p>(Text with EEA relevance)</p>
</longTitle>
</preface>
<preface>
<longTitle>
<p>
<span>REGULATION <docType>Regulation</docType>(EU) No<docNumber>2017/625</docNumber></span>
</p>
<p>of<docDate date="2017-03-15">15 March 2017</docDate></p>
<p>on official controls and other official activities performed to ensure the application of food and feed law, rules on animal health and welfare, plant health and plant protection products, amending Regulations (EC) No 999/2001, (EC) No 396/2005, (EC) No 1069/2009, (EC) No 1107/2009, (EU) No 1151/2012, (EU) No 652/2014, (EU) 2016/429 and (EU) 2016/2031 of the European Parliament and of the Council, Council Regulations (EC) No 1/2005 and (EC) No 1099/2009 and Council Directives 98/58/EC, 1999/74/EC, 2007/43/EC, 2008/119/EC and 2008/120/EC, and repealing Regulations (EC) No 854/2004 and (EC) No 882/2004 of the European Parliament and of the Council, Council Directives 89/608/EEC, 89/662/EEC, 90/425/EEC, 91/496/EEC, 96/23/EC, 96/93/EC and 97/78/EC and Council Decision 92/438/EEC (Official Controls Regulation) (Text with EEA relevance)</p>
</longTitle>
</preface>
----------------------------------------------------------------------------------------------------
6%|▌ | 5/90 [00:29<08:16, 5.84s/it]
Bootstrapped 4 full traces after 6 examples in round 0.
Average Metric: 9 / 10 (90.0): 100%|██████████| 10/10 [00:12<00:00, 1.25s/it]
Average Metric across folds (using 100 examples): 83.0
......
source diff could not be displayed: it is too large. Options to address this: view the blob.