Native diffusers textual embeddings loading

#2
by multimodalart HF staff - opened
Files changed (1) hide show
  1. handler.py +7 -10
handler.py CHANGED
@@ -5,7 +5,7 @@ from huggingface_hub import hf_hub_download
5
  from diffusers import DiffusionPipeline
6
  import base64
7
  from io import BytesIO
8
- from cog_sdxl.dataset_and_utils import TokenEmbeddingsHandler
9
 
10
 
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -24,15 +24,14 @@ class EndpointHandler:
24
 
25
  self.pipe.load_lora_weights("SvenN/sdxl-emoji", weight_name="lora.safetensors")
26
  self.pipe.fuse_lora()
27
-
28
- text_encoders = [self.pipe.text_encoder, self.pipe.text_encoder_2]
29
- tokenizers = [self.pipe.tokenizer, self.pipe.tokenizer_2]
30
 
31
  embedding_path = hf_hub_download(
32
  repo_id="SvenN/sdxl-emoji", filename="embeddings.pti", repo_type="model"
33
  )
34
- embhandler = TokenEmbeddingsHandler(text_encoders, tokenizers)
35
- embhandler.load_embeddings(embedding_path)
 
 
36
 
37
  def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
38
  """
@@ -45,11 +44,9 @@ class EndpointHandler:
45
  inputs = data.pop("inputs", data)
46
 
47
  # Automatically add trigger tokens to the beginning of the prompt
48
- full_prompt = f"A <s0><s1> emoji {inputs}"
49
  images = self.pipe(
50
- full_prompt,
51
- cross_attention_kwargs={"scale": 0.8},
52
- num_inference_steps=25
53
  ).images
54
  image = images[0]
55
 
 
5
  from diffusers import DiffusionPipeline
6
  import base64
7
  from io import BytesIO
8
+ from safetensors.torch import load_file
9
 
10
 
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
24
 
25
  self.pipe.load_lora_weights("SvenN/sdxl-emoji", weight_name="lora.safetensors")
26
  self.pipe.fuse_lora()
 
 
 
27
 
28
  embedding_path = hf_hub_download(
29
  repo_id="SvenN/sdxl-emoji", filename="embeddings.pti", repo_type="model"
30
  )
31
+ state_dict = load_file(embedding_path)
32
+
33
+ self.pipe.load_textual_inversion(state_dict["text_encoders_0"], token=["<s0>", "<s1>"], text_encoder=self.pipe.text_encoder, tokenizer=self.pipe.tokenizer)
34
+ self.pipe.load_textual_inversion(state_dict["text_encoders_1"], token=["<s0>", "<s1>"], text_encoder=self.pipe.text_encoder_2, tokenizer=self.pipe.tokenizer_2)
35
 
36
  def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
37
  """
 
44
  inputs = data.pop("inputs", data)
45
 
46
  # Automatically add trigger tokens to the beginning of the prompt
 
47
  images = self.pipe(
48
+ inputs,
49
+ **data['parameters']
 
50
  ).images
51
  image = images[0]
52