### Install necessary packages
Langchain supports many LLM inference providers, including Fireworks.

In [None]:
!pip install langchain
!pip install python-dotenv
!pip install langchain-fireworks

In [21]:
import os
from dotenv import load_dotenv
from langchain.chains import LLMChain
from langchain_core.output_parsers import StrOutputParser
from langchain_fireworks import Fireworks 

load_dotenv()

True

### API KEY
* register and get api key from : https://fireworks.ai/api-keys
* put the key in the file .env file in FIREWORKS_API_KEY variable

In [22]:
api_key = os.getenv("FIREWORKS_API_KEY")

In [23]:
# maximum number of tokens to generate by the model
max_tokens = {}
max_tokens[0] = 1000
max_tokens[1] = 1000
max_tokens[2] = 2000

### Prompting Models

In [24]:
models = [
     'accounts/fireworks/models/starcoder-7b', 
     'accounts/fireworks/models/starcoder-16b', 
     'accounts/fireworks/models/llama-v2-13b-code-instruct', 
     'accounts/fireworks/models/llama-v2-34b-code-instruct',
     'accounts/fireworks/models/llama-v2-70b-code-instruct',
     'accounts/fireworks/models/mixtral-8x7b-instruct',
          ]

In [19]:
shots = [0,1,2]

for shot in shots:
    print(f'Processing shot: {shot}')
    
    base_path = f'data/prompts/{shot}-shot'
    prompt = open(f'{base_path}/prompt.txt', 'r').read()
    
    for model in models:
        model_name = model.split('/')[-1]
        print(f'Processing model: {model_name}')
        
        results_dir = f'{base_path}/results'
        
        if not os.path.exists(results_dir):
            os.makedirs(results_dir)

        
        file_path = f'{results_dir}/{model_name}.fireworks.ai.txt'
        
        # Check if the result file already exists
        if os.path.exists(file_path):
            print('Skipping...')
            continue

        llm = Fireworks(
			fireworks_api_key=api_key,
			model=model,
			max_tokens=max_tokens[shot])
        result = llm.invoke(prompt)
        
        with open(file_path, 'w') as file:
            file.write(result)  


Processing shot: 0
Processing model: starcoder-7b
Processing model: starcoder-16b
Processing model: llama-v2-13b-code-instruct
Processing model: llama-v2-34b-code-instruct
Processing model: llama-v2-70b-code-instruct
Processing model: mixtral-8x7b-instruct
Processing shot: 1
Processing model: starcoder-7b
Processing model: starcoder-16b
Processing model: llama-v2-13b-code-instruct
Processing model: llama-v2-34b-code-instruct
Processing model: llama-v2-70b-code-instruct
Processing model: mixtral-8x7b-instruct
Processing shot: 2
Processing model: starcoder-7b
Processing model: starcoder-16b
Processing model: llama-v2-13b-code-instruct
Processing model: llama-v2-34b-code-instruct
Processing model: llama-v2-70b-code-instruct
Processing model: mixtral-8x7b-instruct


In [37]:
import requests
import json

def send_fireworks_chat_request(model,messages, api_key, max_tokens=4096, 
                                temperature=0.6, top_p=1,top_k=40, 
                                frequency_penalty=0, 
                                presence_penalty=0, 
                                ):

    url = "https://api.fireworks.ai/inference/v1/chat/completions"
    payload = {
        "model": model,
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty,
        "presence_penalty": presence_penalty,
        "top_k": top_k,
    }
    headers = {
        "Accept": "application/json",
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    
    response = requests.post(url, json=payload, headers=headers)
    return response.json()

In [None]:

system_message = """
You are AKN4EU XML formation converter. 
You receive plain text documents and convert them to XML. 
Return only the final XML document converted from the text.
The user gives some examples of XML documents.
"""


assistant_message = """
EUROPEAN COMMISSION
Brussels, 21.12.2016
2016/0411 (COD)
Proposal for a
REGULATION OF THE EUROPEAN PARLIAMENT AND OF THE
COUNCIL
amending Regulation (EC)
No 1008/2008 on common rules for the operation of air services in the
Community
EN

"""

user_message_1 = """
Convert plain text of following coverpage to AKN4EU XML format.


EUROPEAN COMMISSION
Brussels, 21.12.2016
2016/0411 (COD)
Proposal for a
REGULATION OF THE EUROPEAN PARLIAMENT AND OF THE
COUNCIL
amending Regulation (EC)
No 1008/2008 on common rules for the operation of air services in the
Community
EN
"""

assistant_message = """
<coverPage>
	<container name="logo">
		<p><img src="EC.png" alt="EUROPEAN COMMISSION"/></p>
	</container>
	<container name="actingEntity">
		<p><organization refersTo="~_COM">EUROPEAN COMMISSION</organization></p>
	</container>
	<container name="mainDoc">
		<block name="placeAndDate">
			<location refersTo="~_BEL_BRU">Brussels</location>, <date date="2016-12-21">21.12.2016</date>
		</block>
	</container>
	<container name="procedureIdentifier">
		<p><docketNumber refersTo="~_procedure_2016_411">2016/0411 (COD)</docketNumber></p>
	</container>
	<longTitle>
		<p><docStage>Proposal for a</docStage>
			<docType refersTo="~_REG">REGULATION OF THE EUROPEAN PARLIAMENT AND OF THE
				COUNCIL</docType>
			<docPurpose>amending <ref href="http://data.europa.eu/eli/reg/2008/1008">Regulation (EC)
					No 1008/2008 on common rules for the operation of air services in the
				Community</ref></docPurpose></p>
	</longTitle>
	<container name="mainDocLanguage">
		<p><inline name="language" refersTo="~_FRBRlanguage">EN</inline></p>
	</container>
</coverPage>
"""

user_message_2 = """
Convert plain text of following coverpage to AKN4EU XML format.

EUROPEAN COMMISSION
Brussels, 21.12.2017
2012/0412 (COD)
Proposal for a
REGULATION OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL amending Regulation (EC)
No 1009/2009 on common rules for the operation of air services in the Community
EN
"""

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message_1},
    {"role": "assistant", "content": assistant_message},
    {"role": "user", "content": user_message_2},
]

In [35]:
response = send_fireworks_chat_request("accounts/fireworks/models/mixtral-8x7b-instruct",messages, api_key)
print(response)

{'id': '16132b15-b1f0-4b85-b330-be64b145fcbe', 'object': 'chat.completion', 'created': 1712076650, 'model': 'accounts/fireworks/models/mixtral-8x7b-instruct', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': '<coverPage>\n\t<container name="logo">\n\t\t<p><img src="EC.png" alt="EUROPEAN COMMISSION"/></p>\n\t</container>\n\t<container name="actingEntity">\n\t\t<p><organization refersTo="~_COM">EUROPEAN COMMISSION</organization></p>\n\t</container>\n\t<container name="mainDoc">\n\t\t<block name="placeAndDate">\n\t\t\t<location refersTo="~_BEL_BRU">Brussels</location>, <date date="2017-12-21">21.12.2017</date>\n\t\t</block>\n\t</container>\n\t<container name="procedureIdentifier">\n\t\t<p><docketNumber refersTo="~_procedure_2012_412">2012/0412 (COD)</docketNumber></p>\n\t</container>\n\t<longTitle>\n\t\t<p><docStage>Proposal for a</docStage>\n\t\t\t<docType refersTo="~_REG">REGULATION OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL</docType>\n\t\t\t<docPurpose>amending <

In [42]:
def send_fireworks_completion_request(model,prompt, api_key, max_tokens=4096, 
                                temperature=0.6, top_p=1,top_k=40, 
                                frequency_penalty=0, 
                                presence_penalty=0,
                                ):

    url = "https://api.fireworks.ai/inference/v1/completions"
    payload = {
        "model": model,        
        "max_tokens": max_tokens,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": top_k,
        "presence_penalty": presence_penalty,
        "frequency_penalty": frequency_penalty,
        "prompt": prompt,
    }
    headers = {
        "Accept": "application/json",
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    
    response = requests.post(url, json=payload, headers=headers)
    return response.json()

In [45]:

prompt = """
You are AKN4EU XML formation converter. 
You receive plain text documents and convert them to XML. 
Return only the final XML document converted from the text.
The user gives some examples of XML documents.


#######################################

# PLAIN TEXT EXAMPLE #


EUROPEAN COMMISSION
Brussels, 21.12.2016
2016/0411 (COD)
Proposal for a
REGULATION OF THE EUROPEAN PARLIAMENT AND OF THE
COUNCIL
amending Regulation (EC)
No 1008/2008 on common rules for the operation of air services in the
Community
EN


#######################################

# XML OF TEXT EXAMPLE #

<coverPage>
	<container name="logo">
		<p><img src="EC.png" alt="EUROPEAN COMMISSION"/></p>
	</container>
	<container name="actingEntity">
		<p><organization refersTo="~_COM">EUROPEAN COMMISSION</organization></p>
	</container>
	<container name="mainDoc">
		<block name="placeAndDate">
			<location refersTo="~_BEL_BRU">Brussels</location>, <date date="2016-12-21">21.12.2016</date>
		</block>
	</container>
	<container name="procedureIdentifier">
		<p><docketNumber refersTo="~_procedure_2016_411">2016/0411 (COD)</docketNumber></p>
	</container>
	<longTitle>
		<p><docStage>Proposal for a</docStage>
			<docType refersTo="~_REG">REGULATION OF THE EUROPEAN PARLIAMENT AND OF THE
				COUNCIL</docType>
			<docPurpose>amending <ref href="http://data.europa.eu/eli/reg/2008/1008">Regulation (EC)
					No 1008/2008 on common rules for the operation of air services in the
				Community</ref></docPurpose></p>
	</longTitle>
	<container name="mainDocLanguage">
		<p><inline name="language" refersTo="~_FRBRlanguage">EN</inline></p>
	</container>
</coverPage>


#############

# TEXT TO CONVERT #

EUROPEAN COMMISSION
Brussels, 21.12.2017
2012/0412 (COD)
Proposal for a
REGULATION OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL amending Regulation (EC)
No 1009/2009 on common rules for the operation of air services in the Community
EN


#######################################

# XML OF COVER PAGE PLAIN TEXT #
"""


In [46]:
response = send_fireworks_completion_request("accounts/fireworks/models/mixtral-8x7b-instruct",prompt, api_key)
print(response)

{'id': '4eeb7a9c-bf52-43c7-94eb-a9ed14feaba3', 'object': 'text_completion', 'created': 1712078198, 'model': 'accounts/fireworks/models/mixtral-8x7b-instruct', 'choices': [{'index': 0, 'text': '<coverPage>\n\t<container name="logo">\n\t\t<p><img src="EC.png" alt="EUROPEAN COMMISSION"/></p>\n\t</container>\n\t<container name="actingEntity">\n\t\t<p><organization refersTo="~_COM">EUROPEAN COMMISSION</organization></p>\n\t</container>\n\t<container name="mainDoc">\n\t\t<block name="placeAndDate">\n\t\t\t<location refersTo="~_BEL_BRU">Brussels</location>, <date date="2017-12-21">21.12.2017</date>\n\t\t</block>\n\t</container>\n\t<container name="procedureIdentifier">\n\t\t<p><docketNumber refersTo="~_procedure_2012_412">2012/0412 (COD)</docketNumber></p>\n\t</container>\n\t<longTitle>\n\t\t<p><docStage>Proposal for a</docStage>\n\t\t\t<docType refersTo="~_REG">REGULATION OF THE EUROPEAN PARLIAMENT AND OF THE\n\t\t\t\tCOUNCIL</docType>\n\t\t\t<docPurpose>amending <ref href="http://data.europ

### XML Extraction from results

In [None]:
import os
import re

base_dir = 'data/prompts'
shots = [0, 1, 2]

for shot in shots:
    results_path = os.path.join(base_dir, f'{shot}-shot', 'results')
    results_xml_path = os.path.join(base_dir, f'{shot}-shot', 'results-xml')

    # Ensure the results-xml directory exists
    if not os.path.exists(results_xml_path):
        os.makedirs(results_xml_path)
    
    # Loop through each result file in the results directory
    if os.path.exists(results_path) and os.path.isdir(results_path):
        for result_file in os.listdir(results_path):
            file_path = os.path.join(results_path, result_file)
            if file_path.endswith('.fireworks.ai.txt'):
                with open(file_path, 'r') as file:
                    result_content = file.read()
                
                # Regular expression to find content enclosed by <coverPage>...</coverPage>
                # This pattern ignores any text outside the XML tags
                start_tag = "<coverPage>"
                end_tag = "</coverPage>"
                
                # Finding the last occurrence of the start_tag and the last occurrence of the end_tag
                start = result_content.rfind(start_tag)
                end = result_content.rfind(end_tag) + len(end_tag)
                
                # If the start tag or end tag is not found, return an empty string or a specific message
                if start == -1 or end == -1:
                    print(f"No XML content found in {result_file}")
                
                xml_content = result_content[start:end]
                
                    
                    # Prepares the filename and path for saving the extracted XML
                xml_file_name = result_file.replace('.txt', '.xml')
                xml_file_path = os.path.join(results_xml_path, xml_file_name)
                    
                    # Writes the XML content to a new file in the results-xml directory
                with open(xml_file_path, 'w') as xml_file:
                    xml_file.write(xml_content)
                print(f'Extracted and saved XML for {xml_file_name}')